Source code for build_tools.syllable_walk_tui.services.selector_runner

"""
Name selector execution service.

Mirrors the CLI behavior of build_tools.name_selector.
"""

import json
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING

from build_tools.name_selector.name_class import get_default_policy_path, load_name_classes
from build_tools.name_selector.selector import compute_selection_statistics, select_names
from build_tools.syllable_walk_tui.services.exporter import export_names_to_txt

if TYPE_CHECKING:
    from build_tools.syllable_walk_tui.modules.generator import CombinerState, SelectorState
    from build_tools.syllable_walk_tui.modules.oscillator import PatchState



[docs]
@dataclass
class SelectorResult:
    """Result from selector execution."""

    selected: list[dict]
    selected_names: list[str]
    output_path: Path
    meta_output: dict
    error: str | None = None




[docs]
def run_selector(
    patch: "PatchState",
    combiner_state: "CombinerState",
    selector_state: "SelectorState",
) -> SelectorResult:
    """
    Run name_selector for a patch (mirrors CLI behavior exactly).

    This function mirrors the CLI:
        python -m build_tools.name_selector \\
            --run-dir <patch.corpus_dir> \\
            --candidates <from combiner output> \\
            --name-class <name_class> \\
            --count <count> \\
            --mode <mode>

    Output is written to: <run-dir>/selections/{prefix}_{name_class}_{N}syl.json

    TUI Extension:
        When combiner_state.syllable_mode == "all":
        - Writes a combined selection file: {prefix}_{name_class}_all.json
        - Also writes per-length selections: {prefix}_{name_class}_2syl/3syl/4syl.json
        - Exports matching .txt files for each JSON output

    Args:
        patch: PatchState with corpus data
        combiner_state: CombinerState for candidates path and seed
        selector_state: SelectorState with selection parameters

    Returns:
        SelectorResult with selected names and metadata

    Note:
        Caller is responsible for validating patch state and combiner output
        before calling.
    """
    # Extract values for clarity
    run_dir = patch.corpus_dir
    prefix = patch.corpus_type.lower() if patch.corpus_type else "nltk"
    selector = selector_state
    combiner = combiner_state

    # Validate required data
    if not run_dir:
        return SelectorResult(
            selected=[],
            selected_names=[],
            output_path=Path(),
            meta_output={},
            error="No corpus directory set",
        )

    if not combiner.last_output_path:
        return SelectorResult(
            selected=[],
            selected_names=[],
            output_path=Path(),
            meta_output={},
            error="No candidates generated. Run Generate Candidates first.",
        )

    candidates_path = Path(combiner.last_output_path)
    if not candidates_path.exists():
        return SelectorResult(
            selected=[],
            selected_names=[],
            output_path=Path(),
            meta_output={},
            error=f"Candidates file not found: {candidates_path.name}",
        )

    try:
        # Load candidates
        with open(candidates_path, encoding="utf-8") as f:
            candidates_data = json.load(f)
        candidates = candidates_data.get("candidates", [])

        if not candidates:
            return SelectorResult(
                selected=[],
                selected_names=[],
                output_path=Path(),
                meta_output={},
                error="No candidates in file",
            )

        # Load policy
        policy_path = get_default_policy_path()
        policies = load_name_classes(policy_path)

        if selector.name_class not in policies:
            return SelectorResult(
                selected=[],
                selected_names=[],
                output_path=Path(),
                meta_output={},
                error=f"Unknown name class: {selector.name_class}",
            )

        policy = policies[selector.name_class]

        # Compute statistics
        stats = compute_selection_statistics(
            candidates, policy, mode=selector.mode  # type: ignore[arg-type]
        )

        # Select names (combined set)
        selected = select_names(
            candidates,
            policy,
            count=selector.count,
            mode=selector.mode,  # type: ignore[arg-type]
            order=selector.order,  # type: ignore[arg-type]
            seed=combiner.seed,
        )

        # Prepare output directory
        selections_dir = run_dir / "selections"
        selections_dir.mkdir(parents=True, exist_ok=True)

        # Extract syllable count from combiner state (supports "all" in TUI)
        if combiner.syllable_mode == "all":
            syllables_label = "all"
            output_filename = f"{prefix}_{selector.name_class}_all.json"
        else:
            syllables_label = str(combiner.syllables)
            output_filename = f"{prefix}_{selector.name_class}_{syllables_label}syl.json"
        output_path = selections_dir / output_filename

        # Build output structure
        output = {
            "metadata": {
                "source_candidates": candidates_path.name,
                "name_class": selector.name_class,
                "policy_description": policy.description,
                "policy_file": str(policy_path),
                "mode": selector.mode,
                "order": selector.order,
                "seed": combiner.seed,
                "total_evaluated": stats["total_evaluated"],
                "admitted": stats["admitted"],
                "rejected": stats["rejected"],
                "rejection_reasons": stats["rejection_reasons"],
                "score_distribution": stats["score_distribution"],
                "output_count": len(selected),
                "generated_at": datetime.now(timezone.utc).isoformat(),
            },
            "selections": selected,
        }

        # Write output
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(output, f, indent=2)

        # Auto-export TXT for combined output when using "all"
        if combiner.syllable_mode == "all":
            export_names_to_txt([s["name"] for s in selected], str(output_path))

        warnings: list[str] = []

        # If "all", also generate per-syllable selections + txt
        if combiner.syllable_mode == "all":
            missing: list[str] = []
            candidates_files = combiner.last_candidates_files or {}
            for syllable_count in ["2", "3", "4"]:
                candidates_file = candidates_files.get(syllable_count)
                if not candidates_file:
                    missing.append(syllable_count)
                    continue

                per_path = Path(candidates_file)
                if not per_path.exists():
                    missing.append(syllable_count)
                    continue

                with open(per_path, encoding="utf-8") as f:
                    per_candidates_data = json.load(f)
                per_candidates = per_candidates_data.get("candidates", [])

                if not per_candidates:
                    continue

                per_stats = compute_selection_statistics(
                    per_candidates, policy, mode=selector.mode  # type: ignore[arg-type]
                )

                per_selected = select_names(
                    per_candidates,
                    policy,
                    count=selector.count,
                    mode=selector.mode,  # type: ignore[arg-type]
                    order=selector.order,  # type: ignore[arg-type]
                    seed=combiner.seed,
                )

                per_output_filename = f"{prefix}_{selector.name_class}_{syllable_count}syl.json"
                per_output_path = selections_dir / per_output_filename

                per_output = {
                    "metadata": {
                        "source_candidates": Path(candidates_file).name,
                        "name_class": selector.name_class,
                        "policy_description": policy.description,
                        "policy_file": str(policy_path),
                        "mode": selector.mode,
                        "order": selector.order,
                        "seed": combiner.seed,
                        "total_evaluated": per_stats["total_evaluated"],
                        "admitted": per_stats["admitted"],
                        "rejected": per_stats["rejected"],
                        "rejection_reasons": per_stats["rejection_reasons"],
                        "score_distribution": per_stats["score_distribution"],
                        "output_count": len(per_selected),
                        "generated_at": datetime.now(timezone.utc).isoformat(),
                    },
                    "selections": per_selected,
                }

                with open(per_output_path, "w", encoding="utf-8") as f:
                    json.dump(per_output, f, indent=2)

                export_names_to_txt([s["name"] for s in per_selected], str(per_output_path))

            if missing:
                warnings.append(
                    "Missing candidates files for syllable counts: " + ", ".join(missing)
                )

        # Build meta file
        meta_output = {
            "tool": "name_selector",
            "version": "1.0.0",
            "generated_at": datetime.now(timezone.utc).isoformat(),
            "arguments": {
                "run_dir": str(run_dir),
                "candidates": str(candidates_path),
                "name_class": selector.name_class,
                "policy_file": str(policy_path),
                "count": selector.count,
                "mode": selector.mode,
                "order": selector.order,
                "seed": combiner.seed,
            },
            "input": {
                "candidates_file": str(candidates_path),
                "candidates_loaded": len(candidates),
                "policy_file": str(policy_path),
                "policy_name": selector.name_class,
                "policy_description": policy.description,
            },
            "output": {
                "selections_file": str(output_path),
                "selections_count": len(selected),
            },
            "statistics": {
                "total_evaluated": stats["total_evaluated"],
                "admitted": stats["admitted"],
                "admitted_percentage": (
                    round(stats["admitted"] / stats["total_evaluated"] * 100, 2)
                    if stats["total_evaluated"] > 0
                    else 0
                ),
                "rejected": stats["rejected"],
                "rejection_reasons": stats["rejection_reasons"],
                "score_distribution": stats["score_distribution"],
                "mode": selector.mode,
                "source_prefix": prefix,
                "syllable_count": syllables_label,
            },
        }

        if warnings:
            meta_output["warnings"] = warnings

        # Write meta file
        meta_filename = f"{prefix}_selector_meta.json"
        meta_path = selections_dir / meta_filename
        with open(meta_path, "w", encoding="utf-8") as f:
            json.dump(meta_output, f, indent=2)

        # Extract names for convenience
        selected_names = [s["name"] for s in selected]

        return SelectorResult(
            selected=selected,
            selected_names=selected_names,
            output_path=output_path,
            meta_output=meta_output,
            error=None,
        )

    except Exception as e:
        return SelectorResult(
            selected=[],
            selected_names=[],
            output_path=Path(),
            meta_output={},
            error=str(e),
        )