Python API Reference

Auto-generated from source code docstrings.

Command-line interface for chess-self-coach.

Entry point for the CLI. Dispatches to subcommands: setup, train, update, syzygy.

`main(argv=None)`

Main CLI entry point.

Parameters:

Name	Type	Description	Default
`argv`	`list[str] \| None`	Command-line arguments (defaults to sys.argv[1:]).	`None`

Source code in src/chess_self_coach/cli.py

def main(argv: list[str] | None = None) -> None:
    """Main CLI entry point.

    Args:
        argv: Command-line arguments (defaults to sys.argv[1:]).
    """
    parser = argparse.ArgumentParser(
        prog="chess-self-coach",
        description="Learn from your chess mistakes: Stockfish analysis + spaced repetition training.",
    )
    parser.add_argument(
        "--version", action="version", version=f"%(prog)s {__version__}"
    )

    subparsers = parser.add_subparsers(dest="command", help="Available commands")

    # --- setup ---
    subparsers.add_parser(
        "setup",
        help="Interactive setup: verify Stockfish, configure game platforms",
    )

    # --- update ---
    subparsers.add_parser(
        "update",
        help="Update chess-self-coach to the latest version",
    )

    # --- syzygy ---
    p_syzygy = subparsers.add_parser(
        "syzygy",
        help="Manage Syzygy endgame tablebases",
    )
    p_syzygy.add_argument(
        "action",
        choices=["download", "status"],
        help="download: fetch 3-5 piece tables (~1 GB). status: show installed tables.",
    )

    # --- train ---
    p_train = subparsers.add_parser(
        "train",
        help="Training mode: extract mistakes from games and drill with spaced repetition",
    )
    p_train.add_argument(
        "--prepare",
        action="store_true",
        help="Analyze games and export training_data.json",
    )
    p_train.add_argument(
        "--serve",
        action="store_true",
        help="Open the training PWA in the browser",
    )
    p_train.add_argument(
        "--stats",
        action="store_true",
        help="Show training progress statistics",
    )
    p_train.add_argument(
        "--derive",
        action="store_true",
        help="Re-derive training_data.json from analysis_data.json (no Stockfish needed)",
    )
    p_train.add_argument(
        "--games",
        type=int,
        default=10,
        help="Maximum games to analyze (default: 10)",
    )
    p_train.add_argument(
        "--depth",
        type=int,
        default=18,
        help="Stockfish analysis depth (default: 18)",
    )
    p_train.add_argument(
        "--threads",
        type=int,
        default=None,
        help="Stockfish threads (default: auto = CPU count - 1)",
    )
    p_train.add_argument(
        "--hash",
        type=int,
        default=None,
        dest="hash_mb",
        help="Stockfish hash table size in MB (default: 1024)",
    )
    p_train.add_argument(
        "--reanalyze-all",
        action="store_true",
        dest="reanalyze_all",
        help="Re-analyze all games (skip only those with identical settings)",
    )
    p_train.add_argument(
        "--engine",
        type=str,
        default=None,
        help="Path to the Stockfish binary (overrides config.json)",
    )
    p_train.add_argument(
        "--refresh-explanations",
        action="store_true",
        dest="refresh_explanations",
        help="[Dev] Regenerate explanations without re-running Stockfish",
    )
    p_train.add_argument(
        "--fresh",
        action="store_true",
        help="[Dev] Discard existing training data and start from scratch",
    )

    args = parser.parse_args(argv)

    if args.command is None:
        _launch_server()
        return

    if args.command == "setup":
        _setup()

    elif args.command == "update":
        from chess_self_coach.updater import update

        update()

    elif args.command == "syzygy":
        from chess_self_coach.syzygy import (
            _DEFAULT_DIR as _SYZYGY_DEFAULT,
            download_syzygy,
            syzygy_status,
        )

        if args.action == "download":
            default = str(_SYZYGY_DEFAULT)
            custom = input(f"  Installation directory [{default}]: ").strip()
            target = Path(custom) if custom else _SYZYGY_DEFAULT

            try:
                path = download_syzygy(target_dir=target)
                print(f"  ✓ Syzygy tables downloaded to {path}")
            except (FileNotFoundError, Exception) as e:
                print(f"  ❌ {e}", file=sys.stderr)
                sys.exit(1)
        elif args.action == "status":
            from chess_self_coach.config import ConfigError, error_exit, load_config

            try:
                config = load_config()
            except ConfigError as e:
                error_exit(str(e), hint=e.hint)
            status = syzygy_status(config)
            if status["found"]:
                print(f"  Path: {status['path']}")
                print(f"  WDL files: {status['wdl_count']}")
                print(f"  DTZ files: {status['dtz_count']}")
                print(f"  Total size: {status['total_size_mb']} MB")
            else:
                print("  No Syzygy tables found.")
                print("  Download with: chess-self-coach syzygy download")

    elif args.command == "train":
        if args.derive:
            from chess_self_coach.training_data import generate_training_data

            try:
                generate_training_data()
            except (FileNotFoundError, RuntimeError) as e:
                print(f"  {e}", file=sys.stderr)
                sys.exit(1)
        elif args.refresh_explanations:
            from chess_self_coach.trainer import refresh_explanations

            refresh_explanations()
        elif args.prepare:
            from chess_self_coach.analysis import AnalysisSettings, analyze_games
            from chess_self_coach.opening_explorer import ExplorerAPIError

            # Build settings from config, with CLI overrides
            from chess_self_coach.config import ConfigError, error_exit, load_config

            try:
                config = load_config()
            except ConfigError as e:
                error_exit(str(e), hint=e.hint)
            settings = AnalysisSettings.from_config(config)
            if args.threads is not None:
                settings.threads = args.threads
            if args.hash_mb is not None:
                settings.hash_mb = args.hash_mb

            try:
                analyze_games(
                    max_games=args.games,
                    reanalyze_all=args.reanalyze_all,
                    settings=settings,
                    engine_path=args.engine,
                )
            except (FileNotFoundError, RuntimeError, ExplorerAPIError) as e:
                print(f"  {e}", file=sys.stderr)
                sys.exit(1)

            from chess_self_coach.tactics import run_tactical_analysis

            run_tactical_analysis()

            from chess_self_coach.classifier import run_classification

            run_classification()
        elif args.serve:
            print("  Tip: you can now just run `chess-self-coach` directly.\n")
            _launch_server()
        elif args.stats:
            from chess_self_coach.trainer import print_stats

            print_stats()
        else:
            print("Usage: chess-self-coach train [--prepare|--derive|--serve|--stats]")
            print("Run 'chess-self-coach train -h' for details.")

Configuration loading for chess-self-coach.

Loads config.json (Stockfish path, player usernames) and .env (Lichess token). Every error produces a clear message with the exact command to fix it.

`ConfigError`

Bases: Exception

Raised when config.json is missing, invalid, or unreadable.

Attributes:

Name	Type	Description
`hint`		Optional fix suggestion for the user.

Source code in src/chess_self_coach/config.py

class ConfigError(Exception):
    """Raised when config.json is missing, invalid, or unreadable.

    Attributes:
        hint: Optional fix suggestion for the user.
    """

    def __init__(self, message: str, hint: str | None = None) -> None:
        super().__init__(message)
        self.hint = hint

`analysis_data_path()`

Return the path to analysis_data.json.

Returns:

Type	Description
`Path`	Path to data/analysis_data.json.

Source code in src/chess_self_coach/config.py

def analysis_data_path() -> Path:
    """Return the path to analysis_data.json.

    Returns:
        Path to data/analysis_data.json.
    """
    return data_dir() / ANALYSIS_DATA_FILE

`check_stockfish_version(sf_path, expected=None)`

Check the Stockfish version and warn if it doesn't match expected.

Parameters:

Name	Type	Description	Default
`sf_path`	`Path`	Path to the Stockfish binary.	required
`expected`	`str \| None`	Expected version string (e.g. "Stockfish 18").	`None`

Returns:

Type	Description
`str`	The detected version string.

Source code in src/chess_self_coach/config.py

def check_stockfish_version(sf_path: Path, expected: str | None = None) -> str:
    """Check the Stockfish version and warn if it doesn't match expected.

    Args:
        sf_path: Path to the Stockfish binary.
        expected: Expected version string (e.g. "Stockfish 18").

    Returns:
        The detected version string.
    """
    try:
        result = subprocess.run(
            [str(sf_path)],
            input="uci\nquit\n",
            capture_output=True,
            text=True,
            timeout=5,
        )
        for line in result.stdout.splitlines():
            if line.startswith("id name "):
                version = line[len("id name ") :]
                if expected and expected not in version:
                    print(
                        f"  ⚠ Warning: Expected {expected}, found {version}",
                        file=sys.stderr,
                    )
                return version
    except (subprocess.TimeoutExpired, OSError) as e:
        print(f"  ⚠ Warning: Could not check Stockfish version: {e}", file=sys.stderr)

    return "unknown"

`classifications_data_path()`

Return the path to classifications_data.json.

Returns:

Type	Description
`Path`	Path to data/classifications_data.json.

Source code in src/chess_self_coach/config.py

def classifications_data_path() -> Path:
    """Return the path to classifications_data.json.

    Returns:
        Path to data/classifications_data.json.
    """
    return data_dir() / CLASSIFICATIONS_DATA_FILE

`config_path()`

Return the path to config.json.

Returns:

Type	Description
`Path`	Path to data/config.json.

Source code in src/chess_self_coach/config.py

def config_path() -> Path:
    """Return the path to config.json.

    Returns:
        Path to data/config.json.
    """
    return data_dir() / CONFIG_FILE

`data_dir()`

Return the data directory path.

Returns:

Type	Description
`Path`	Path to the data/ directory in the project root.

Source code in src/chess_self_coach/config.py

def data_dir() -> Path:
    """Return the data directory path.

    Returns:
        Path to the data/ directory in the project root.
    """
    return _find_project_root() / DATA_DIR

`error_exit(message, hint=None, debug_cmd=None)`

Print a formatted error and exit.

Parameters:

Name	Type	Description	Default
`message`	`str`	What went wrong.	required
`hint`	`str \| None`	How to fix it.	`None`
`debug_cmd`	`str \| None`	A shell command the user can run to debug.	`None`

Source code in src/chess_self_coach/config.py

def error_exit(message: str, hint: str | None = None, debug_cmd: str | None = None) -> NoReturn:
    """Print a formatted error and exit.

    Args:
        message: What went wrong.
        hint: How to fix it.
        debug_cmd: A shell command the user can run to debug.
    """
    print(f"\n❌ {message}", file=sys.stderr)
    if hint:
        print(f"\n  How to fix:\n  {hint}", file=sys.stderr)
    if debug_cmd:
        print(f"\n  To debug manually:\n    {debug_cmd}", file=sys.stderr)
    print(file=sys.stderr)
    sys.exit(1)

`fetched_games_path()`

Return the path to fetched_games.json.

Returns:

Type	Description
`Path`	Path to data/fetched_games.json.

Source code in src/chess_self_coach/config.py

def fetched_games_path() -> Path:
    """Return the path to fetched_games.json.

    Returns:
        Path to data/fetched_games.json.
    """
    return data_dir() / FETCHED_GAMES_FILE

`find_stockfish(config=None)`

Find a working Stockfish binary.

Search order: config.json path → common install locations → system → $PATH.

Parameters:

Name	Type	Description	Default
`config`	`dict[str, Any] \| None`	Optional loaded config dict.	`None`

Returns:

Type	Description
`Path`	Path to the Stockfish binary.

Raises:

Type	Description
`SystemExit`	If no Stockfish binary is found.

Source code in src/chess_self_coach/config.py

def find_stockfish(config: dict[str, Any] | None = None) -> Path:
    """Find a working Stockfish binary.

    Search order: config.json path → common install locations → system → $PATH.

    Args:
        config: Optional loaded config dict.

    Returns:
        Path to the Stockfish binary.

    Raises:
        SystemExit: If no Stockfish binary is found.
    """
    candidates: list[Path] = []

    # From config
    if config:
        sf_config = config.get("stockfish", {})
        path = sf_config.get("path", "")
        if path and path != "auto":
            candidates.append(Path(path))
        if fallback := sf_config.get("fallback_path"):
            candidates.append(Path(fallback))

    # Default search paths
    candidates.extend(_SF_SEARCH_PATHS)

    # $PATH lookup
    sf_in_path = shutil.which("stockfish")
    if sf_in_path:
        candidates.append(Path(sf_in_path))

    # Test each candidate
    tested = []
    for candidate in candidates:
        if candidate.exists() and candidate.is_file():
            return candidate
        tested.append(f"  - {candidate} ({'exists' if candidate.exists() else 'not found'})")

    error_exit(
        "Stockfish not found.",
        hint=(
            "Paths tested:\n"
            + "\n".join(tested)
            + "\n\n  To fix:\n"
            "  - Install Stockfish: sudo apt install stockfish\n"
            "  - Or specify the path: chess-self-coach analyze --engine /path/to/stockfish file.pgn"
        ),
    )

`load_config()`

Load config.json from the data directory.

Returns:

Type	Description
`dict[str, Any]`	Parsed config dictionary.

Raises:

Type	Description
`ConfigError`	If config.json is missing or invalid.

Source code in src/chess_self_coach/config.py

def load_config() -> dict[str, Any]:
    """Load config.json from the data directory.

    Returns:
        Parsed config dictionary.

    Raises:
        ConfigError: If config.json is missing or invalid.
    """
    cfg = config_path()

    if not cfg.exists():
        # Migration hint: detect old location at project root
        root = _find_project_root()
        old_path = root / CONFIG_FILE
        if old_path.exists():
            raise ConfigError(
                "config.json found at old location (project root).",
                hint=f"Move it to the data directory:\n"
                f"  mkdir -p {root / DATA_DIR}\n"
                f"  mv {old_path} {cfg}",
            )
        raise ConfigError(
            "config.json not found.",
            hint=f"Run 'chess-self-coach setup' to create it,\n"
            f"  or copy {root / DATA_DIR / CONFIG_EXAMPLE_FILE} to {cfg}",
        )

    try:
        with open(cfg) as f:
            return json.load(f)
    except json.JSONDecodeError as e:
        raise ConfigError(
            f"config.json is not valid JSON: {e}",
            hint=f"Check the syntax in {cfg}",
        ) from e

`load_lichess_token(required=True)`

Load the Lichess API token from .env or environment.

Parameters:

Name	Type	Description	Default
`required`	`bool`	If True, exit on missing token. If False, return None.	`True`

Returns:

Type	Description
`str \| None`	The API token string, or None if not found and not required.

Raises:

Type	Description
`SystemExit`	If required=True and no token is found or it looks invalid.

Source code in src/chess_self_coach/config.py

def load_lichess_token(required: bool = True) -> str | None:
    """Load the Lichess API token from .env or environment.

    Args:
        required: If True, exit on missing token. If False, return None.

    Returns:
        The API token string, or None if not found and not required.

    Raises:
        SystemExit: If required=True and no token is found or it looks invalid.
    """
    root = _find_project_root()
    env_path = root / ENV_FILE

    # Load .env if it exists
    if env_path.exists():
        load_dotenv(env_path)

    token = os.environ.get("LICHESS_API_TOKEN", "").strip()

    if not token:
        if not required:
            return None
        error_exit(
            "Lichess API token not found.",
            hint=(
                "1. Create a token at: https://lichess.org/account/oauth/token/create\n"
                "  2. Save it:\n"
                f'     echo "LICHESS_API_TOKEN=lip_your_token_here" > {env_path}'
            ),
            debug_cmd='curl -H "Authorization: Bearer lip_your_token" https://lichess.org/api/account',
        )

    if not token.startswith("lip_"):
        if not required:
            return None
        error_exit(
            f"Lichess token looks invalid (expected 'lip_...' prefix, got '{token[:8]}...').",
            hint="Regenerate your token at https://lichess.org/account/oauth/token/create",
        )

    return token

`save_config(config)`

Write config back to config.json atomically.

Parameters:

Name	Type	Description	Default
`config`	`dict[str, Any]`	The config dictionary to save.	required

Source code in src/chess_self_coach/config.py

def save_config(config: dict[str, Any]) -> None:
    """Write config back to config.json atomically.

    Args:
        config: The config dictionary to save.
    """
    from chess_self_coach.io import atomic_write_json

    cfg = config_path()
    cfg.parent.mkdir(parents=True, exist_ok=True)
    atomic_write_json(cfg, config, pretty=True)
    print(f"  Config saved to {cfg}")

`tactics_data_path()`

Return the path to tactics_data.json.

Returns:

Type	Description
`Path`	Path to data/tactics_data.json.

Source code in src/chess_self_coach/config.py

def tactics_data_path() -> Path:
    """Return the path to tactics_data.json.

    Returns:
        Path to data/tactics_data.json.
    """
    return data_dir() / TACTICS_DATA_FILE

`training_data_path()`

Return the path to training_data.json.

Returns:

Type	Description
`Path`	Path to data/training_data.json.

Source code in src/chess_self_coach/config.py

def training_data_path() -> Path:
    """Return the path to training_data.json.

    Returns:
        Path to data/training_data.json.
    """
    return data_dir() / TRAINING_DATA_FILE

Phase 1: collect raw per-move data from Stockfish, tablebase, and opening explorer.

Stores all evaluation data in analysis_data.json with maximum granularity. Phase 2 (training_data.py) annotates and filters this data into training_data.json.

`AnalysisInterrupted`

Bases: Exception

Raised when analysis is cancelled via the interrupt signal.

Source code in src/chess_self_coach/analysis.py

class AnalysisInterrupted(Exception):
    """Raised when analysis is cancelled via the interrupt signal."""

`AnalysisSettings` `dataclass`

Engine and analysis configuration for full game analysis.

Attributes:

Name	Type	Description
`threads`	`int`	Number of Stockfish threads. 0 means auto (cpu_count - 1).
`hash_mb`	`int`	Stockfish hash table size in megabytes.
`limits`	`dict[str, dict[str, float \| int]]`	Depth/time limits per piece-count bracket.

Source code in src/chess_self_coach/analysis.py

@dataclass
class AnalysisSettings:
    """Engine and analysis configuration for full game analysis.

    Attributes:
        threads: Number of Stockfish threads. 0 means auto (cpu_count - 1).
        hash_mb: Stockfish hash table size in megabytes.
        limits: Depth/time limits per piece-count bracket.
    """

    threads: int = 0
    hash_mb: int = 1024
    limits: dict[str, dict[str, float | int]] = field(
        default_factory=lambda: dict(ANALYSIS_LIMITS)
    )

    @classmethod
    def from_config(cls, config: dict) -> AnalysisSettings:
        """Build settings from a config dict (from config.json).

        Args:
            config: Full config dict. Reads the 'analysis_engine' key.

        Returns:
            AnalysisSettings with values from config, defaults for missing keys.
        """
        section = config.get("analysis_engine", {})
        threads_raw = section.get("threads", "auto")
        if threads_raw == "auto" or threads_raw == 0:
            threads = 0
        else:
            threads = int(threads_raw)
        return cls(
            threads=threads,
            hash_mb=int(section.get("hash_mb", 1024)),
            limits=section.get("limits", dict(ANALYSIS_LIMITS)),
        )

    @property
    def resolved_threads(self) -> int:
        """Actual thread count (resolves 0/auto to cpu_count - 1)."""
        return self.threads if self.threads > 0 else worker_count()

    def to_dict(self) -> dict:
        """Serialize to a dict suitable for JSON storage.

        Returns:
            Dict with threads (resolved to actual count), hash_mb, limits.
        """
        return {
            "threads": self.resolved_threads,
            "hash_mb": self.hash_mb,
            "limits": self.limits,
        }

`resolved_threads` `property`

Actual thread count (resolves 0/auto to cpu_count - 1).

`from_config(config)` `classmethod`

Build settings from a config dict (from config.json).

Parameters:

Name	Type	Description	Default
`config`	`dict`	Full config dict. Reads the 'analysis_engine' key.	required

Returns:

Type	Description
`AnalysisSettings`	AnalysisSettings with values from config, defaults for missing keys.

Source code in src/chess_self_coach/analysis.py

@classmethod
def from_config(cls, config: dict) -> AnalysisSettings:
    """Build settings from a config dict (from config.json).

    Args:
        config: Full config dict. Reads the 'analysis_engine' key.

    Returns:
        AnalysisSettings with values from config, defaults for missing keys.
    """
    section = config.get("analysis_engine", {})
    threads_raw = section.get("threads", "auto")
    if threads_raw == "auto" or threads_raw == 0:
        threads = 0
    else:
        threads = int(threads_raw)
    return cls(
        threads=threads,
        hash_mb=int(section.get("hash_mb", 1024)),
        limits=section.get("limits", dict(ANALYSIS_LIMITS)),
    )

`to_dict()`

Serialize to a dict suitable for JSON storage.

Returns:

Type	Description
`dict`	Dict with threads (resolved to actual count), hash_mb, limits.

Source code in src/chess_self_coach/analysis.py

def to_dict(self) -> dict:
    """Serialize to a dict suitable for JSON storage.

    Returns:
        Dict with threads (resolved to actual count), hash_mb, limits.
    """
    return {
        "threads": self.resolved_threads,
        "hash_mb": self.hash_mb,
        "limits": self.limits,
    }

`analyze_games(*, game_ids=None, max_games=10, reanalyze_all=False, settings=None, engine_path=None, on_progress=None, on_game_done=None, cancel=None)`

Fetch games, analyze with Stockfish + APIs, write analysis_data.json.

Phase 1 orchestrator: sequential analysis with one multi-threaded Stockfish. Caller is responsible for invoking generate_training_data() (Phase 2) afterwards.

Parameters:

Name	Type	Description	Default
`game_ids`	`list[str] \| None`	Specific game IDs to analyze from the cache. When set, skips the fetch phase and reads from fetched_games.json. When None or empty, fetches from APIs (original behavior).	`None`
`max_games`	`int`	Maximum total games in the dataset (default: 10).	`10`
`reanalyze_all`	`bool`	If True, re-analyze games (skip only same-settings).	`False`
`settings`	`AnalysisSettings \| None`	Override analysis settings. None = load from config.	`None`
`engine_path`	`str \| None`	Override path to Stockfish binary.	`None`
`on_progress`	`Callable[[dict], None] \| None`	Optional callback for structured progress events.	`None`
`cancel`	`Event \| None`	Threading event for cancellation.	`None`

Source code in src/chess_self_coach/analysis.py

def analyze_games(
    *,
    game_ids: list[str] | None = None,
    max_games: int = 10,
    reanalyze_all: bool = False,
    settings: AnalysisSettings | None = None,
    engine_path: str | None = None,
    on_progress: Callable[[dict], None] | None = None,
    on_game_done: Callable[[str, dict], None] | None = None,
    cancel: threading.Event | None = None,
) -> None:
    """Fetch games, analyze with Stockfish + APIs, write analysis_data.json.

    Phase 1 orchestrator: sequential analysis with one multi-threaded Stockfish.
    Caller is responsible for invoking generate_training_data() (Phase 2) afterwards.

    Args:
        game_ids: Specific game IDs to analyze from the cache. When set,
            skips the fetch phase and reads from fetched_games.json.
            When None or empty, fetches from APIs (original behavior).
        max_games: Maximum total games in the dataset (default: 10).
        reanalyze_all: If True, re-analyze games (skip only same-settings).
        settings: Override analysis settings. None = load from config.
        engine_path: Override path to Stockfish binary.
        on_progress: Optional callback for structured progress events.
        cancel: Threading event for cancellation.
    """
    from chess_self_coach.config import (
        analysis_data_path,
        check_stockfish_version,
        find_stockfish,
        load_config,
        load_lichess_token,
    )
    from chess_self_coach.importer import fetch_chesscom_games, fetch_lichess_games

    def _emit(event: dict) -> None:
        if on_progress:
            on_progress(event)

    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s",
        datefmt="%H:%M:%S",
    )

    config = load_config()
    players = config.get("players", {})
    lichess_user = players.get("lichess", "")
    chesscom_user = players.get("chesscom")

    if not lichess_user and not chesscom_user:
        raise RuntimeError(
            "No player configured. Run 'chess-self-coach setup' to set your Lichess and/or chess.com username."
        )

    # Load settings
    if settings is None:
        settings = AnalysisSettings.from_config(config)
    # Find Stockfish
    if engine_path:
        sf_path = Path(engine_path)
        if not sf_path.exists():
            raise FileNotFoundError(f"Engine not found: {sf_path}")
    else:
        sf_path = find_stockfish(config)
        expected = config.get("stockfish", {}).get("expected_version")
        version = check_stockfish_version(sf_path, expected)
        print(f"  Using {version} at {sf_path}")
        _emit({"phase": "init", "message": f"Using {version}"})

    # Load Lichess token for Opening Explorer
    lichess_token = load_lichess_token(required=False)

    analysis_path = analysis_data_path()

    # Load existing analysis data
    existing_data = load_analysis_data(analysis_path)
    existing_games = existing_data.get("games", {})

    # --- Load games: from cache (game_ids) or from APIs (fetch) ---
    new_games: list[tuple[chess.pgn.Game, str, chess.Color]] = []

    if game_ids:
        # Load specific games from cache (no API fetch needed)
        from chess_self_coach.game_cache import get_cached_game, load_game_cache

        print(f"\n  Loading {len(game_ids)} game(s) from cache...")
        _emit({"phase": "fetch", "message": "Loading from cache...", "percent": 5})

        cache = load_game_cache()
        cached_games = cache.get("games", {})

        for gid in game_ids:
            if gid in existing_games and not reanalyze_all:
                print(f"  Skipped (already analyzed): {gid}")
                continue

            entry = cached_games.get(gid)
            if entry is None:
                print(f"  Warning: game not in cache, skipping: {gid}")
                continue

            game = get_cached_game(gid)
            if game is None:
                continue

            player_color_str = entry.get("player_color", "white")
            player_color = chess.WHITE if player_color_str == "white" else chess.BLACK
            new_games.append((game, gid, player_color))

        _emit(
            {
                "phase": "fetch",
                "message": f"{len(new_games)} game(s) to analyze",
                "percent": 10,
            }
        )
    else:
        # Original behavior: fetch from APIs
        print("\n  Fetching games...")
        _emit({"phase": "fetch", "message": "Fetching games...", "percent": 5})
        all_games: list[chess.pgn.Game] = []

        if lichess_user:
            all_games.extend(fetch_lichess_games(lichess_user, max_games))
        if chesscom_user:
            all_games.extend(fetch_chesscom_games(chesscom_user, max_games))

        if not all_games:
            print("  No games found.")
            _emit({"phase": "done", "message": "No games found.", "percent": 100})
            return

        # Filter games
        reanalyzed = 0
        skipped = 0
        for game in all_games:
            game_id = game.headers.get("Link", game.headers.get("Site", ""))
            if game_id == "?":
                game_id = ""

            white = game.headers.get("White", "?")
            black = game.headers.get("Black", "?")
            if white == "?" and black == "?":
                continue

            player_color = _determine_player_color(game, lichess_user, chesscom_user)
            if player_color is None:
                continue

            is_reanalysis = False
            if game_id and game_id in existing_games:
                if not reanalyze_all:
                    skipped += 1
                    continue
                is_reanalysis = True

            new_games.append((game, game_id, player_color))
            if is_reanalysis:
                reanalyzed += 1

        if skipped:
            print(f"  Skipped {skipped} already-analyzed game(s)")

        new_games.sort(
            key=lambda t: t[0].headers.get("Date", "0000.00.00"),
            reverse=True,
        )
        cap = max(0, max_games - len(existing_games)) + reanalyzed
        new_games = new_games[:cap]

        _emit(
            {
                "phase": "fetch",
                "message": f"Found {len(all_games)} game(s) ({len(new_games)} to analyze)",
                "percent": 10,
            }
        )

    if not new_games:
        print("  No new games to analyze.")
        _emit({"phase": "done", "message": "No new games.", "percent": 100})
        return

    # Open Stockfish (one instance, multi-threaded)
    threads = settings.resolved_threads
    hash_mb = settings.hash_mb
    print(
        f"\n  Analyzing {len(new_games)} game(s) with Stockfish ({threads} threads, {hash_mb}MB hash)..."
    )
    print("  This may take several minutes...\n")

    engine = chess.engine.SimpleEngine.popen_uci(str(sf_path))
    engine.configure({"Threads": threads, "Hash": hash_mb})

    # Syzygy endgame tablebases
    from chess_self_coach.syzygy import find_syzygy

    syzygy_path = find_syzygy(config)
    if not syzygy_path:
        engine.quit()
        raise RuntimeError(
            "Syzygy endgame tablebases (3-5 pieces) not found.\n"
            "  Install with: chess-self-coach syzygy download"
        )
    engine.configure({"SyzygyPath": str(syzygy_path)})
    _log.info("Syzygy tablebases: %s", syzygy_path)

    try:
        wall_start = _time.time()
        done_count = 0
        total_tasks = len(new_games)
        _emit({"phase": "analyze", "message": f"Analyzing 0/{total_tasks}", "percent": 15, "current": 0, "total": total_tasks})

        for game, game_id, player_color in new_games:
            done_count += 1
            white = game.headers.get("White", "?")
            black = game.headers.get("Black", "?")
            label = f"{white} vs {black}"

            # Re-analysis: pass existing move data to preserve API results
            prev_moves = None
            if reanalyze_all and game_id and game_id in existing_games:
                prev_moves = existing_games[game_id].get("moves")

            def _on_wait(attempt: int, delay: float) -> None:
                msg = f"API rate limit, retry #{attempt} in {delay:.0f}s ({label})"
                print(f"  ⏳ {msg}")
                _emit({"phase": "analyze", "message": msg, "waiting": True})

            start = _time.time()
            try:
                game_data = collect_game_data(
                    game,
                    engine,
                    player_color,
                    settings,
                    lichess_token,
                    game_id=game_id,
                    existing_moves=prev_moves,
                    on_wait=_on_wait,
                )
            except Exception as exc:
                _emit({"phase": "analyze", "message": str(exc), "error": True})
                print(f"  [{done_count}/{total_tasks}] Error analyzing {label}: {exc}")
                continue

            elapsed = _time.time() - start

            # Store analysis duration for ETA estimation
            game_data["analysis_duration_s"] = round(elapsed, 1)

            # Per-game summary
            _moves = game_data["moves"]
            _opening = [m for m in _moves if m.get("in_opening")]
            _other = [m for m in _moves if not m.get("in_opening")]
            _log.info(
                "Game %d/%d: %s — %d moves in %.1fs",
                done_count,
                total_tasks,
                label,
                len(_moves),
                elapsed,
            )
            if _opening:
                _op_ms = sum(
                    m["timing_ms"]["eval_before"] + m["timing_ms"]["eval_after"]
                    for m in _opening
                )
                _log.info("  Opening: %d moves in %.1fs", len(_opening), _op_ms / 1000)
            if _other:
                _ot_ms = sum(
                    m["timing_ms"]["eval_before"] + m["timing_ms"]["eval_after"]
                    for m in _other
                )
                _src_counts: dict[str, int] = {}
                for m in _other:
                    s = m["eval_source"]
                    _src_counts[s] = _src_counts.get(s, 0) + 1
                _src_str = ", ".join(f"{k}: {v}" for k, v in _src_counts.items())
                _log.info(
                    "  Non-opening: %d moves (%s) in %.1fs",
                    len(_other),
                    _src_str,
                    _ot_ms / 1000,
                )

            # Store in analysis data
            store_id = game_id or f"unknown_{done_count}"
            existing_data.setdefault("games", {})[store_id] = game_data
            existing_data["player"] = {
                "lichess": lichess_user,
                "chesscom": chesscom_user or "",
            }

            # Atomic write after each game (crash-safe)
            save_analysis_data(existing_data, analysis_path)

            # Run downstream phases so the game is usable in the UI
            if on_game_done:
                on_game_done(store_id, game_data)

            # Progress
            move_count = len(game_data["moves"])
            wall_elapsed = _time.time() - wall_start
            avg_per_game = wall_elapsed / done_count
            remaining = avg_per_game * (total_tasks - done_count)
            eta_min, eta_sec = divmod(int(remaining), 60)
            eta_str = f"{eta_min}m{eta_sec:02d}s" if eta_min else f"{eta_sec}s"

            print(
                f"  [{done_count}/{total_tasks}] {label}... "
                f"{move_count} moves ({elapsed:.1f}s) — ETA {eta_str}"
            )
            pct = 15 + int(75 * done_count / total_tasks)
            _emit(
                {
                    "phase": "analyze",
                    "message": f"Analyzing {done_count}/{total_tasks}: {label}",
                    "percent": pct,
                    "current": done_count,
                    "total": total_tasks,
                }
            )

            # Check cancel
            if cancel and cancel.is_set():
                raise AnalysisInterrupted(
                    f"Interrupted. Saved {done_count}/{total_tasks} games."
                )
    finally:
        engine.quit()

    total_games = len(existing_data.get("games", {}))
    print(f"\n  Analysis data saved: {analysis_path}")
    print(f"  Total games analyzed: {total_games}")
    _emit(
        {
            "phase": "done",
            "message": f"Analysis complete. {total_games} games.",
            "percent": 100,
        }
    )

`collect_game_data(game, engine, player_color, settings, lichess_token=None, game_id='', existing_moves=None, on_wait=None)`

Collect full per-move analysis data for one game (Phase 1).

Source hierarchy per move

Tablebase (priority if ≤7 pieces — perfect information)
Masters opening explorer (in_opening=True, cp_loss=0)
Cloud eval (depth 50-70, cp_loss computed)
Stockfish (fallback, always re-run on re-analysis)

When existing_moves is provided (re-analysis), API data (masters, cloud eval, tablebase) is preserved and only breakpoints are re-tested. Stockfish positions are always re-run regardless.

Parameters:

Name	Type	Description	Default
`game`	`Game`	Parsed PGN game.	required
`engine`	`SimpleEngine`	Running Stockfish engine (already configured with threads/hash).	required
`player_color`	`Color`	Which color the player was.	required
`settings`	`AnalysisSettings`	Analysis settings (for limits and storage).	required
`lichess_token`	`str \| None`	Lichess API token for Opening Explorer. None to skip.	`None`
`game_id`	`str`	Unique game identifier. Passed to engine.analyse() so python-chess sends ucinewgame between different games (hash table reset).	`''`
`existing_moves`	`list[dict[str, Any]] \| None`	Previous per-move data from a prior analysis. When set, preserves API data and only re-tests breakpoints + re-runs Stockfish.	`None`
`on_wait`	`Callable[[int, float], None] \| None`	Optional callback(attempt, delay_seconds) called when an API request is retrying after a transient error (429, 5xx).	`None`

Returns:

Type	Description
`dict[str, Any]`	Dict with game headers, settings, and moves[] array ready for
`dict[str, Any]`	storage in analysis_data.json.

Source code in src/chess_self_coach/analysis.py

def collect_game_data(
    game: chess.pgn.Game,
    engine: chess.engine.SimpleEngine,
    player_color: chess.Color,
    settings: AnalysisSettings,
    lichess_token: str | None = None,
    game_id: str = "",
    existing_moves: list[dict[str, Any]] | None = None,
    on_wait: Callable[[int, float], None] | None = None,
) -> dict[str, Any]:
    """Collect full per-move analysis data for one game (Phase 1).

    Source hierarchy per move:
      1. Tablebase (priority if ≤7 pieces — perfect information)
      2. Masters opening explorer (in_opening=True, cp_loss=0)
      3. Cloud eval (depth 50-70, cp_loss computed)
      4. Stockfish (fallback, always re-run on re-analysis)

    When *existing_moves* is provided (re-analysis), API data (masters,
    cloud eval, tablebase) is preserved and only breakpoints are re-tested.
    Stockfish positions are always re-run regardless.

    Args:
        game: Parsed PGN game.
        engine: Running Stockfish engine (already configured with threads/hash).
        player_color: Which color the player was.
        settings: Analysis settings (for limits and storage).
        lichess_token: Lichess API token for Opening Explorer. None to skip.
        game_id: Unique game identifier. Passed to engine.analyse() so python-chess
            sends ucinewgame between different games (hash table reset).
        existing_moves: Previous per-move data from a prior analysis. When set,
            preserves API data and only re-tests breakpoints + re-runs Stockfish.
        on_wait: Optional callback(attempt, delay_seconds) called when an API
            request is retrying after a transient error (429, 5xx).

    Returns:
        Dict with game headers, settings, and moves[] array ready for
        storage in analysis_data.json.
    """
    limits = settings.limits
    moves_data: list[dict] = []

    # Collect all (fen, move_uci) pairs for opening explorer batch query
    fens_and_moves: list[tuple[str, str]] = []
    node = game
    while node.variations:
        board = node.board()
        next_node = node.variations[0]
        fens_and_moves.append((board.fen(), next_node.move.uci()))
        node = next_node

    # Query Masters Opening Explorer (stops at departure)
    explorer_results: list[dict | None] = [None] * len(fens_and_moves)
    if lichess_token:
        from chess_self_coach.opening_explorer import query_opening_sequence

        existing_explorer = (
            [m.get("opening_explorer") for m in existing_moves]
            if existing_moves
            else None
        )
        explorer_results = query_opening_sequence(
            fens_and_moves, lichess_token, existing_results=existing_explorer
        )

    # Walk through the game and collect eval data for each move
    node = game
    ply = 0
    # Cache: eval_before for current position (reused as eval_after of previous move)
    cached_eval: dict[str, Any] | None = None
    cached_tb: dict[str, Any] | None = None
    cached_mpv: dict[str, Any] | None = None
    cloud_departed = False
    prev_player_clock: float | None = None
    prev_opponent_clock: float | None = None

    while node.variations:
        board = node.board()
        next_node = node.variations[0]
        actual_move = next_node.move
        piece_count = len(board.piece_map())
        side = "white" if board.turn == chess.WHITE else "black"

        # --- Board enrichments ---
        board_after = board.copy()
        board_after.push(actual_move)
        is_check = board_after.is_check()
        is_capture = board.is_capture(actual_move)
        is_castling = board.is_castling(actual_move)
        is_en_passant = board.is_en_passant(actual_move)
        is_promotion = actual_move.promotion is not None
        promoted_to = None
        if is_promotion and actual_move.promotion is not None:
            promoted_to = chess.piece_symbol(actual_move.promotion)

        # --- Clock data ---
        player_clock = next_node.clock()
        opponent_clock = None
        if next_node.variations:
            opponent_clock = next_node.variations[0].clock()

        # Compute time spent (difference from previous clock reading for the same side)
        time_spent = None
        if side == ("white" if player_color == chess.WHITE else "black"):
            # Player's move
            if player_clock is not None and prev_player_clock is not None:
                time_spent = prev_player_clock - player_clock
        else:
            # Opponent's move
            if opponent_clock is not None and prev_opponent_clock is not None:
                time_spent = prev_opponent_clock - opponent_clock

        # --- Opening Explorer: determine if move is in Masters theory ---
        explorer_data = explorer_results[ply] if ply < len(explorer_results) else None
        in_opening = (
            explorer_data is not None
            and explorer_data.get("_source") == "masters"
        )

        # --- Existing move data (re-analysis) ---
        existing: dict[str, Any] | None = (
            existing_moves[ply]
            if existing_moves is not None and ply < len(existing_moves)
            else None
        )

        # --- board_after_fen needed by all tiers ---
        board_after_fen = board_after.fen()

        # --- Tier dispatch: tablebase → masters+cloud → cloud → stockfish ---
        tb_before: dict[str, Any] | None = None
        tb_after: dict[str, Any] | None = None
        mpv_before: dict[str, Any] | None = None
        pc_after = len(board_after.piece_map())

        if piece_count <= MAX_PIECES:
            # ── Tier 1: Tablebase (priority, ≤7 pieces, perfect information) ──
            # Probe tablebase for current position
            _tb_probed: dict[str, Any] | None = None
            if existing and existing.get("tablebase_before"):
                _tb_probed = existing["tablebase_before"]
            else:
                _tb_probed = probe_position_full(board.fen(), on_wait=on_wait)

            t0 = _time.time()
            if cached_eval is not None and cached_tb is not None:
                score_before = cached_eval
                tb_before = cached_tb
                mpv_before = cached_mpv
                _eb_src = "cache"
            elif _tb_probed is not None:
                tb_before = _tb_probed
                score_before = _tb_to_eval(_tb_probed, board.turn)
                _eb_src = "tablebase"
            else:
                infos = engine.analyse(
                    board, _analysis_limit_from_settings(board, limits),
                    multipv=MULTIPV, game=game_id,
                )
                score_before = _extract_eval(infos[0], board)
                mpv_before = _extract_multipv(infos, board)
                _eb_src = "sf_fallback"
            score_before_ms = (_time.time() - t0) * 1000

            # Probe tablebase for position after move
            _tb_probed_after: dict[str, Any] | None = None
            if existing and existing.get("tablebase_after"):
                _tb_probed_after = existing["tablebase_after"]
            elif pc_after <= MAX_PIECES:
                _tb_probed_after = probe_position_full(board_after_fen, on_wait=on_wait)

            t0 = _time.time()
            if _tb_probed_after is not None:
                tb_after = _tb_probed_after
                score_after = _tb_to_eval(_tb_probed_after, board_after.turn)
                cached_eval = score_after
                cached_tb = _tb_probed_after
                cached_mpv = None
                _ea_src = "tablebase"
            else:
                infos_after = engine.analyse(
                    board_after,
                    _analysis_limit_from_settings(board_after, limits),
                    multipv=MULTIPV, game=game_id,
                )
                score_after = _extract_eval(infos_after[0], board_after)
                cached_eval = score_after
                cached_tb = None
                cached_mpv = _extract_multipv(infos_after, board_after)
                _ea_src = "stockfish"
            score_after_ms = (_time.time() - t0) * 1000

            tier_source = "tablebase" if tb_before is not None and tb_after is not None else (
                "stockfish+tablebase" if tb_before is not None or tb_after is not None else "stockfish"
            )

        else:
            # ── Tiers 2-4: Cloud scoring / Stockfish (>7 pieces) ──
            tier_source = "stockfish"  # default, overridden below

            # Can we reuse preserved cloud scoring data?
            _preserved_cloud = (
                existing is not None
                and existing.get("eval_source") == "cloud_eval"
            )

            if in_opening and _preserved_cloud:
                # ── Tier 2a: Masters move with preserved cloud scoring ──
                assert existing is not None  # guaranteed by _preserved_cloud
                t0 = _time.time()
                if cached_eval is not None:
                    score_before = cached_eval
                    mpv_before = cached_mpv
                    _eb_src = "cache"
                else:
                    score_before = existing["eval_before"]
                    mpv_before = existing.get("multipv_before")
                    _eb_src = "preserved"
                score_before_ms = (_time.time() - t0) * 1000

                t0 = _time.time()
                score_after = existing["eval_after"]
                _ea_src = "preserved"
                score_after_ms = (_time.time() - t0) * 1000

                tier_source = "cloud_eval"
                cached_eval = score_after
                cached_tb = None
                cached_mpv = None

            elif in_opening:
                # ── Tier 2b: Masters move, fresh cloud scoring query ──
                t0 = _time.time()
                if cached_eval is not None:
                    score_before = cached_eval
                    mpv_before = cached_mpv
                    _eb_src = "cache"
                else:
                    _lbl = f"[ply {ply+1} before] "
                    cloud = query_cloud_eval(
                        board.fen(), on_wait=on_wait, log_label=_lbl)
                    if cloud:
                        score_before = _cloud_eval_to_eval(cloud, board)
                        _eb_src = "cloud_eval"
                    else:
                        infos = engine.analyse(
                            board,
                            _analysis_limit_from_settings(board, limits),
                            multipv=MULTIPV, game=game_id,
                        )
                        score_before = _extract_eval(infos[0], board)
                        mpv_before = _extract_multipv(infos, board)
                        _eb_src = "sf_fallback"
                score_before_ms = (_time.time() - t0) * 1000

                t0 = _time.time()
                _lbl = f"[ply {ply+1} after] "
                cloud_after = query_cloud_eval(
                    board_after_fen, on_wait=on_wait, log_label=_lbl)
                if cloud_after:
                    score_after = _cloud_eval_to_eval(cloud_after, board_after)
                    _ea_src = "cloud_eval"
                    cached_mpv = None
                else:
                    infos_after = engine.analyse(
                        board_after,
                        _analysis_limit_from_settings(board_after, limits),
                        multipv=MULTIPV, game=game_id,
                    )
                    score_after = _extract_eval(infos_after[0], board_after)
                    cached_mpv = _extract_multipv(infos_after, board_after)
                    _ea_src = "sf_fallback"
                score_after_ms = (_time.time() - t0) * 1000

                tier_source = "cloud_eval" if _ea_src == "cloud_eval" else "stockfish"
                cached_eval = score_after
                cached_tb = None

            elif not cloud_departed and _preserved_cloud:
                # ── Tier 3a: Post-masters, preserved cloud scoring ──
                assert existing is not None  # guaranteed by _preserved_cloud
                t0 = _time.time()
                if cached_eval is not None:
                    score_before = cached_eval
                    mpv_before = cached_mpv
                    _eb_src = "cache"
                else:
                    score_before = existing["eval_before"]
                    mpv_before = existing.get("multipv_before")
                    _eb_src = "preserved"
                score_before_ms = (_time.time() - t0) * 1000

                t0 = _time.time()
                score_after = existing["eval_after"]
                _ea_src = "preserved"
                score_after_ms = (_time.time() - t0) * 1000

                tier_source = "cloud_eval"
                cached_eval = score_after
                cached_tb = None
                cached_mpv = None

            elif not cloud_departed:
                # ── Tier 3b: Post-masters, fresh cloud scoring query ──
                t0 = _time.time()
                if cached_eval is not None:
                    score_before = cached_eval
                    mpv_before = cached_mpv
                    _eb_src = "cache"
                else:
                    _lbl = f"[ply {ply+1} before] "
                    cloud = query_cloud_eval(
                        board.fen(), on_wait=on_wait, log_label=_lbl)
                    if cloud:
                        score_before = _cloud_eval_to_eval(cloud, board)
                        _eb_src = "cloud_eval"
                    else:
                        infos = engine.analyse(
                            board,
                            _analysis_limit_from_settings(board, limits),
                            multipv=MULTIPV, game=game_id,
                        )
                        score_before = _extract_eval(infos[0], board)
                        mpv_before = _extract_multipv(infos, board)
                        _eb_src = "sf_fallback"
                score_before_ms = (_time.time() - t0) * 1000

                t0 = _time.time()
                _lbl = f"[ply {ply+1} after] "
                cloud_after = query_cloud_eval(
                    board_after_fen, on_wait=on_wait, log_label=_lbl)
                if cloud_after:
                    score_after = _cloud_eval_to_eval(cloud_after, board_after)
                    _ea_src = "cloud_eval"
                    tier_source = "cloud_eval"
                    cached_mpv = None
                else:
                    cloud_departed = True
                    infos_after = engine.analyse(
                        board_after,
                        _analysis_limit_from_settings(board_after, limits),
                        multipv=MULTIPV, game=game_id,
                    )
                    score_after = _extract_eval(infos_after[0], board_after)
                    cached_mpv = _extract_multipv(infos_after, board_after)
                    _ea_src = "stockfish"
                score_after_ms = (_time.time() - t0) * 1000

                cached_eval = score_after
                cached_tb = None

            else:
                # ── Tier 4: Stockfish (fallback, always re-run) ──
                t0 = _time.time()
                if cached_eval is not None:
                    score_before = cached_eval
                    mpv_before = cached_mpv
                    _eb_src = "cache"
                else:
                    infos = engine.analyse(
                        board,
                        _analysis_limit_from_settings(board, limits),
                        multipv=MULTIPV, game=game_id,
                    )
                    score_before = _extract_eval(infos[0], board)
                    mpv_before = _extract_multipv(infos, board)
                    _eb_src = "stockfish"
                score_before_ms = (_time.time() - t0) * 1000

                t0 = _time.time()
                infos_after = engine.analyse(
                    board_after,
                    _analysis_limit_from_settings(board_after, limits),
                    multipv=MULTIPV, game=game_id,
                )
                score_after = _extract_eval(infos_after[0], board_after)
                _ea_src = "stockfish"
                score_after_ms = (_time.time() - t0) * 1000

                cached_eval = score_after
                cached_tb = None
                cached_mpv = _extract_multipv(infos_after, board_after)

        # --- Unified variable names for the rest of the loop ---
        eval_before = score_before
        eval_after = score_after
        eval_before_ms = score_before_ms
        eval_after_ms = score_after_ms
        eval_source = tier_source
        tb_before_stored = tb_before
        tb_after_stored = tb_after

        # --- cp_loss: 0 for opening (masters), computed for everything else ---
        cp_loss = 0
        if not in_opening:
            before_cp = eval_before.get("score_cp")
            after_cp = eval_after.get("score_cp")
            if before_cp is not None and after_cp is not None:
                best_uci = eval_before.get("best_move_uci", "")
                if best_uci and actual_move == chess.Move.from_uci(best_uci):
                    cp_loss = 0
                elif board.turn == chess.WHITE:
                    cp_loss = max(0, before_cp - after_cp)
                else:
                    cp_loss = max(0, after_cp - before_cp)

        _source_tag = (
            "[book] " if in_opening
            else "[cloud] " if eval_source == "cloud_eval"
            else "[tb] " if eval_source == "tablebase"
            else ""
        )
        _log.info(
            "  ply %d %s%s: %s — before=%s(%.0fms cp=%s) after=%s(%.0fms cp=%s) cp_loss=%d",
            ply + 1,
            _source_tag,
            board.san(actual_move),
            eval_source,
            _eb_src,
            eval_before_ms,
            eval_before.get("score_cp"),
            _ea_src,
            eval_after_ms,
            eval_after.get("score_cp"),
            cp_loss,
        )

        # --- Build move dict ---
        move_dict = {
            "ply": ply + 1,
            "fen_before": board.fen(),
            "fen_after": board_after_fen,
            "move_san": board.san(actual_move),
            "move_uci": actual_move.uci(),
            "side": side,
            "eval_source": eval_source,
            "in_opening": in_opening,
            "eval_before": eval_before,
            "eval_after": eval_after,
            "multipv_before": mpv_before,
            "tablebase_before": tb_before_stored,
            "tablebase_after": tb_after_stored,
            "opening_explorer": explorer_data,
            "cp_loss": cp_loss,
            "board": {
                "piece_count": piece_count,
                "is_check": is_check,
                "is_capture": is_capture,
                "is_castling": is_castling,
                "is_en_passant": is_en_passant,
                "is_promotion": is_promotion,
                "promoted_to": promoted_to,
                "legal_moves_count": len(list(board.legal_moves)),
            },
            "clock": {
                "player": player_clock,
                "opponent": opponent_clock,
                "time_spent": round(time_spent, 1) if time_spent is not None else None,
            },
            "timing_ms": {
                "eval_before": round(eval_before_ms, 1),
                "eval_after": round(eval_after_ms, 1),
            },
        }
        moves_data.append(move_dict)

        # Update state for next iteration
        if side == ("white" if player_color == chess.WHITE else "black"):
            prev_player_clock = player_clock
        else:
            prev_opponent_clock = opponent_clock

        ply += 1
        node = next_node

    # --- Build game-level dict ---
    p_color = "white" if player_color == chess.WHITE else "black"
    game_id = game.headers.get("Link", game.headers.get("Site", ""))
    source = (
        "lichess"
        if "lichess.org" in game_id
        else ("chess.com" if "chess.com" in game_id else "unknown")
    )

    return {
        "headers": {
            "white": game.headers.get("White", "?"),
            "black": game.headers.get("Black", "?"),
            "date": game.headers.get("Date", "?"),
            "result": game.headers.get("Result", "*"),
            "opening": game.headers.get("Opening", game.headers.get("Event", "?")),
            "source": source,
            "link": game_id,
        },
        "player_color": p_color,
        "analyzed_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
        "settings": settings.to_dict(),
        "moves": moves_data,
    }

`load_analysis_data(path=None)`

Load analysis_data.json, returning empty structure if not found.

Parameters:

Name	Type	Description	Default
`path`	`Path \| None`	Path to analysis_data.json. Defaults to data directory.	`None`

Returns:

Type	Description
`dict`	Parsed dict with at least {version, player, games}.

Source code in src/chess_self_coach/analysis.py

def load_analysis_data(path: Path | None = None) -> dict:
    """Load analysis_data.json, returning empty structure if not found.

    Args:
        path: Path to analysis_data.json. Defaults to data directory.

    Returns:
        Parsed dict with at least {version, player, games}.
    """
    if path is None:
        path = _default_analysis_path()
    if not path.exists():
        return {"version": "1.0", "player": {}, "games": {}}
    try:
        with open(path) as f:
            return json.load(f)
    except (json.JSONDecodeError, KeyError):
        _log.warning("Corrupted analysis_data.json, returning empty structure")
        return {"version": "1.0", "player": {}, "games": {}}

`save_analysis_data(data, path=None)`

Atomically write analysis_data.json.

Parameters:

Name	Type	Description	Default
`data`	`dict`	Full analysis data dict.	required
`path`	`Path \| None`	Target path. Defaults to data directory.	`None`

Source code in src/chess_self_coach/analysis.py

def save_analysis_data(data: dict, path: Path | None = None) -> None:
    """Atomically write analysis_data.json.

    Args:
        data: Full analysis data dict.
        path: Target path. Defaults to data directory.
    """
    if path is None:
        path = _default_analysis_path()
    data["version"] = "1.0"
    atomic_write_json(path, data)

`settings_match(stored, current)`

Check if stored analysis settings match current settings.

Used to skip re-analysis of games already analyzed with identical settings.

Parameters:

Name	Type	Description	Default
`stored`	`dict`	Settings dict from a previously analyzed game.	required
`current`	`dict`	Current settings dict.	required

Returns:

Type	Description
`bool`	True if settings are equivalent.

Source code in src/chess_self_coach/analysis.py

def settings_match(stored: dict, current: dict) -> bool:
    """Check if stored analysis settings match current settings.

    Used to skip re-analysis of games already analyzed with identical settings.

    Args:
        stored: Settings dict from a previously analyzed game.
        current: Current settings dict.

    Returns:
        True if settings are equivalent.
    """
    return (
        stored.get("threads") == current.get("threads")
        and stored.get("hash_mb") == current.get("hash_mb")
        and stored.get("limits") == current.get("limits")
    )

Training mode: explanation generation, move classification, and training data utilities.

Pure functions for generating rule-based explanations, classifying mistakes by centipawn loss, and managing training_data.json (stats, refresh). The heavy analysis pipeline lives in analysis.py (Phase 1: collection, Phase 2: derivation).

`classify_mistake(cp_loss)`

Classify a move by centipawn loss.

Returns:

Type	Description
`str \| None`	Category string or None if the move is acceptable.

Source code in src/chess_self_coach/trainer.py

def classify_mistake(cp_loss: int) -> str | None:
    """Classify a move by centipawn loss.

    Returns:
        Category string or None if the move is acceptable.
    """
    if cp_loss >= BLUNDER_THRESHOLD:
        return "blunder"
    if cp_loss >= MISTAKE_THRESHOLD:
        return "mistake"
    if cp_loss >= INACCURACY_THRESHOLD:
        return "inaccuracy"
    return None

`format_score_cp(cp)`

Format centipawn value as score string like '+0.32'.

Source code in src/chess_self_coach/trainer.py

def format_score_cp(cp: int | None) -> str:
    """Format centipawn value as score string like '+0.32'."""
    if cp is None:
        return "+0.00"
    value = cp / 100.0
    sign = "+" if value >= 0 else ""
    return f"{sign}{value:.2f}"

`generate_context(category, cp_loss, was_mate, score_after_cp, fen='', score_before_cp=None, player_color='white')`

Generate a short context sentence shown BEFORE the player answers.

Includes game phase, advantage context, and what went wrong.

Source code in src/chess_self_coach/trainer.py

def generate_context(
    category: str,
    cp_loss: int,
    was_mate: bool,
    score_after_cp: int | None,
    fen: str = "",
    score_before_cp: int | None = None,
    player_color: str = "white",
) -> str:
    """Generate a short context sentence shown BEFORE the player answers.

    Includes game phase, advantage context, and what went wrong.
    """
    score_after_is_mate = score_after_cp is not None and abs(score_after_cp) >= MATE_CP

    phase = _detect_game_phase(fen) if fen else ""
    color_label = f"playing as {player_color.capitalize()}"
    advantage = _describe_advantage(score_before_cp, player_color) if score_before_cp is not None else ""
    if phase and advantage:
        prefix = f"{phase}, {color_label}, {advantage}."
    elif phase:
        prefix = f"{phase}, {color_label}."
    else:
        prefix = f"{color_label.capitalize()}."

    if was_mate and score_after_cp is not None and abs(score_after_cp) < 50:
        return f"{prefix} Your move threw away a winning position and led to a draw."
    if was_mate:
        return f"{prefix} Your move threw away a forced mate."
    if score_after_is_mate:
        return f"{prefix} Your move allowed your opponent to force checkmate."
    if cp_loss >= MATE_CP:
        return f"{prefix} Your move allowed your opponent to force checkmate."

    pawns = cp_loss / 100.0
    if pawns >= 5:
        return f"{prefix} Your move lost a decisive advantage."
    if pawns >= 2:
        return f"{prefix} Your move lost significant material ({pawns:.1f} pawns)."
    if pawns >= 1:
        return f"{prefix} Your move cost about {pawns:.1f} pawns."
    return f"{prefix} Your move was inaccurate ({pawns:.1f} pawns)."

`generate_explanation(board, actual_san, best_san, cp_loss, category, was_mate=False, score_after_cp=None)`

Generate a rule-based explanation for a mistake.

Detects basic patterns: missed captures, missed checks/checkmates, hanging pieces, stalemate. Falls back to a generic template.

Parameters:

Name	Type	Description	Default
`board`	`Board`	Board position BEFORE the move was played.	required
`actual_san`	`str`	The move the player made (SAN).	required
`best_san`	`str`	The best move according to Stockfish (SAN).	required
`cp_loss`	`int`	Centipawn loss.	required
`category`	`str`	Mistake category string.	required
`was_mate`	`bool`	True if the position before was a forced mate.	`False`
`score_after_cp`	`int \| None`	Score after the move (white perspective), for context.	`None`

Returns:

Type	Description
`str`	Explanation string.

Source code in src/chess_self_coach/trainer.py

def generate_explanation(
    board: chess.Board,
    actual_san: str,
    best_san: str,
    cp_loss: int,
    category: str,
    was_mate: bool = False,
    score_after_cp: int | None = None,
) -> str:
    """Generate a rule-based explanation for a mistake.

    Detects basic patterns: missed captures, missed checks/checkmates,
    hanging pieces, stalemate. Falls back to a generic template.

    Args:
        board: Board position BEFORE the move was played.
        actual_san: The move the player made (SAN).
        best_san: The best move according to Stockfish (SAN).
        cp_loss: Centipawn loss.
        category: Mistake category string.
        was_mate: True if the position before was a forced mate.
        score_after_cp: Score after the move (white perspective), for context.

    Returns:
        Explanation string.
    """
    score_after_is_mate = score_after_cp is not None and abs(score_after_cp) >= MATE_CP

    # Build opening sentence with appropriate phrasing
    if was_mate and score_after_cp is not None and abs(score_after_cp) < 50:
        parts = [f"You played {actual_san} ({category}). You had a forced mate but threw it away — the game is now a draw."]
    elif was_mate:
        parts = [f"You played {actual_san} ({category}). You had a forced mate but lost it."]
    elif score_after_is_mate:
        parts = [f"You played {actual_san} ({category}). This allowed your opponent to force checkmate."]
    else:
        loss_str = _format_cp_loss_human(cp_loss)
        parts = [f"You played {actual_san} ({category}, lost {loss_str})."]

    # Analyze the actual move for immediate stalemate detection
    board_after_actual = None
    try:
        actual_move = board.parse_san(actual_san)
        board_after_actual = board.copy()
        board_after_actual.push(actual_move)
        if board_after_actual.is_stalemate():
            parts.append("This leads to stalemate (draw)!")
    except ValueError:
        pass

    try:
        best_move = board.parse_san(best_san)
    except ValueError:
        parts.append(f"A better move was {best_san}.")
        return " ".join(parts)

    # Check if best move delivers checkmate
    board_after_best = board.copy()
    board_after_best.push(best_move)
    if board_after_best.is_checkmate():
        parts.append(f"{best_san} was checkmate!")
        return " ".join(parts)

    # Check if best move captures a piece
    if board.is_capture(best_move):
        captured_piece = board.piece_at(best_move.to_square)
        if captured_piece is None:
            parts.append(f"{best_san} wins a pawn (en passant).")
        else:
            piece_name = chess.piece_name(captured_piece.piece_type)
            parts.append(f"You missed capturing the {piece_name} with {best_san}.")
    else:
        parts.append(f"A better move was {best_san}.")

    # Check if best move gives check
    if board_after_best.is_check():
        parts.append(f"{best_san} also gives check.")

    # Check if the actual move hangs a piece
    if board_after_actual:
        moving_piece = board.piece_at(actual_move.from_square)
        if moving_piece:
            attacked = board_after_actual.is_attacked_by(
                not board.turn, actual_move.to_square
            )
            defended = board_after_actual.is_attacked_by(
                board.turn, actual_move.to_square
            )
            if attacked and not defended:
                piece_name = chess.piece_name(moving_piece.piece_type)
                sq_name = chess.square_name(actual_move.to_square)
                parts.append(f"Your {piece_name} on {sq_name} is left undefended.")

    return " ".join(parts)

`get_stats_data()`

Compute training statistics from training_data.json.

Returns:

Type	Description
`dict`	Dict with keys: generated, total, by_category, by_source.

Raises:

Type	Description
`FileNotFoundError`	If training_data.json does not exist.

Source code in src/chess_self_coach/trainer.py

def get_stats_data() -> dict:
    """Compute training statistics from training_data.json.

    Returns:
        Dict with keys: generated, total, by_category, by_source.

    Raises:
        FileNotFoundError: If training_data.json does not exist.
    """
    data_path = training_data_path()
    if not data_path.exists():
        raise FileNotFoundError(f"No training data at {data_path}")

    with open(data_path) as f:
        data = json.load(f)

    positions = data.get("positions", [])

    categories: dict[str, int] = {}
    for p in positions:
        cat = p.get("category", "unknown")
        categories[cat] = categories.get(cat, 0) + 1

    sources: dict[str, int] = {}
    for p in positions:
        src = p.get("game", {}).get("source", "unknown")
        sources[src] = sources.get(src, 0) + 1

    return {
        "generated": data.get("generated", "unknown"),
        "total": len(positions),
        "by_category": categories,
        "by_source": sources,
    }

`print_stats()`

Show training progress from training_data.json.

Source code in src/chess_self_coach/trainer.py

def print_stats() -> None:
    """Show training progress from training_data.json."""
    try:
        stats = get_stats_data()
    except FileNotFoundError:
        print(
            "No training data found. Run: chess-self-coach train --prepare",
            file=sys.stderr,
        )
        sys.exit(1)

    if stats["total"] == 0:
        print("  No positions in training data.")
        return

    print("\n  Training Data Stats")
    print(f"  Generated: {stats['generated']}")
    print(f"  Total positions: {stats['total']}")

    print("\n  By category:")
    for cat in ["blunder", "mistake", "inaccuracy"]:
        print(f"    {cat.capitalize()}: {stats['by_category'].get(cat, 0)}")

    print("\n  By source:")
    for src, count in sorted(stats["by_source"].items()):
        print(f"    {src}: {count}")

`refresh_explanations()`

Regenerate explanations in training_data.json without re-running Stockfish.

Reads existing positions, rebuilds explanations using generate_explanation(), and writes back. SRS progress and all other fields are preserved.

Source code in src/chess_self_coach/trainer.py

def refresh_explanations() -> None:
    """Regenerate explanations in training_data.json without re-running Stockfish.

    Reads existing positions, rebuilds explanations using generate_explanation(),
    and writes back. SRS progress and all other fields are preserved.
    """
    data_path = training_data_path()

    if not data_path.exists():
        print("No training data found. Run: chess-self-coach train --prepare", file=sys.stderr)
        sys.exit(1)

    with open(data_path) as f:
        data = json.load(f)

    positions = data.get("positions", [])

    # Remove invalid positions (player_move == best_move)
    before_count = len(positions)
    positions = [p for p in positions if p["player_move"] != p["best_move"]]
    removed = before_count - len(positions)
    if removed:
        data["positions"] = positions
        print(f"  Removed {removed} invalid position(s) (player_move == best_move)")

    # Remove positions where both moves win or both lose (no learning value)
    def _parse_score_cp(s: str) -> int | None:
        try:
            return int(float(s) * 100)
        except (ValueError, TypeError):
            return None

    before_count = len(positions)
    filtered = []
    for p in positions:
        sb = _parse_score_cp(p.get("score_before", ""))
        sa = _parse_score_cp(p.get("score_after", ""))
        if sb is None or sa is None:
            filtered.append(p)
            continue
        mul = 1 if p.get("player_color") == "white" else -1
        player_before = sb * mul
        player_after = sa * mul
        if player_before > DOMINATED_POSITION_CP and player_after > DOMINATED_POSITION_CP:
            continue
        if player_before < -DOMINATED_POSITION_CP and player_after < -DOMINATED_POSITION_CP:
            continue
        filtered.append(p)
    positions = filtered
    removed_decisive = before_count - len(positions)
    if removed_decisive:
        data["positions"] = positions
        print(f"  Removed {removed_decisive} position(s) already decisive (both win or both lose)")

    # Fix tablebase scores for Black: convert from side-to-move to player perspective
    _tb_flip = {"TB:win": "TB:loss", "TB:loss": "TB:win"}
    tb_fixed = 0
    for pos in positions:
        if "tablebase" not in pos or pos.get("player_color") != "black":
            continue
        for key in ("score_before", "score_after", "score_after_best"):
            val = pos.get(key)
            if val in _tb_flip:
                pos[key] = _tb_flip[val]
                tb_fixed += 1
    if tb_fixed:
        data["positions"] = positions
        print(f"  Fixed {tb_fixed} tablebase score(s) (side-to-move → player perspective)")

    updated = 0
    for pos in positions:
        board = chess.Board(pos["fen"])

        # Tablebase-resolved positions: regenerate from stored tablebase data
        tb_data = pos.get("tablebase")
        if tb_data:
            tb_before = tb_data.get("before")
            tb_after = tb_data.get("after")
            if tb_before:
                tb_res_before = TablebaseResult(
                    category=tb_before["category"],
                    dtz=tb_before.get("dtz"),
                    dtm=tb_before.get("dtm"),
                    best_move=None,
                )
                new_context = tablebase_context(
                    tb_res_before, len(board.piece_map()),
                    pos.get("player_color", "white"),
                )
                if tb_after:
                    tb_res_after = TablebaseResult(
                        category=tb_after["category"],
                        dtz=tb_after.get("dtz"),
                        dtm=tb_after.get("dtm"),
                        best_move=None,
                    )
                    new_explanation = tablebase_explanation(
                        tb_res_before, tb_res_after,
                        pos["player_move"], pos["best_move"],
                    )
                else:
                    new_explanation = pos.get("explanation", "")
            else:
                continue
        else:
            # Parse scores to cp
            score_before_str = pos.get("score_before", "+0.00")
            score_after_str = pos.get("score_after", "+0.00")
            try:
                score_before_cp = int(float(score_before_str) * 100)
            except (ValueError, TypeError):
                score_before_cp = None
            try:
                score_after_cp = int(float(score_after_str) * 100)
            except (ValueError, TypeError):
                score_after_cp = None

            was_mate = score_before_cp is not None and abs(score_before_cp) >= MATE_CP

            new_explanation = generate_explanation(
                board, pos["player_move"], pos["best_move"],
                pos["cp_loss"], pos["category"],
                was_mate=was_mate, score_after_cp=score_after_cp,
            )
            new_context = generate_context(
                pos["category"], pos["cp_loss"], was_mate, score_after_cp,
                fen=pos["fen"], score_before_cp=score_before_cp,
                player_color=pos.get("player_color", "white"),
            )
        # Fix source if "unknown" and game.id hints at the platform
        game = pos.get("game", {})
        game_id = game.get("id", "")
        if game.get("source") == "unknown":
            if "lichess.org" in game_id.lower():
                game["source"] = "lichess"
            elif "chess.com" in game_id.lower():
                game["source"] = "chess.com"

        if new_explanation != pos.get("explanation") or new_context != pos.get("context"):
            pos["explanation"] = new_explanation
            pos["context"] = new_context
            updated += 1

    atomic_write_json(data_path, data)

    print(f"  Refreshed {updated}/{len(positions)} explanation(s) in {data_path}")
    if updated:
        print("  Run /review-training to verify text quality")

`time_pressure_context(player_clock, opponent_clock)`

Generate time pressure context string, or empty if not relevant.

Parameters:

Name	Type	Description	Default
`player_clock`	`float \| None`	Player's remaining time in seconds, or None.	required
`opponent_clock`	`float \| None`	Opponent's remaining time in seconds, or None.	required

Source code in src/chess_self_coach/trainer.py

def time_pressure_context(
    player_clock: float | None, opponent_clock: float | None,
) -> str:
    """Generate time pressure context string, or empty if not relevant.

    Args:
        player_clock: Player's remaining time in seconds, or None.
        opponent_clock: Opponent's remaining time in seconds, or None.
    """
    if player_clock is None:
        return ""

    p_min = player_clock / 60

    if p_min < 2:
        if opponent_clock and opponent_clock / 60 > p_min * 2:
            o_min = opponent_clock / 60
            return (
                f"You were under severe time pressure "
                f"({p_min:.0f}min left vs {o_min:.0f}min for your opponent)."
            )
        return f"You were under time pressure ({p_min:.0f}min remaining)."

    if opponent_clock and player_clock > opponent_clock * 1.5:
        o_min = opponent_clock / 60
        return (
            f"You had more time ({p_min:.0f}min vs {o_min:.0f}min) "
            f"and could have taken longer on this move."
        )

    return ""

FastAPI backend server for Chess Self-Coach [App] mode.

Serves the PWA with API endpoints for native Stockfish analysis. Replaces the old static-file-only serve_pwa() from trainer.py.

Key design decisions: - No temp dir: PWA files served directly from source, sw.js and training_data.json via dynamic routes (always fresh, no copy needed). - Single Stockfish engine instance with asyncio.Lock for thread safety. - Engine crash recovery: auto-restart on EngineTerminatedError. - Port scanning: tries 8000-8010 if default port is busy.