Source code for build_tools.syllable_walk_tui.services.selector_runner

"""
Name selector execution service.

Mirrors the CLI behavior of build_tools.name_selector.
"""

import json
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING

from build_tools.name_selector.name_class import get_default_policy_path, load_name_classes
from build_tools.name_selector.selector import compute_selection_statistics, select_names
from build_tools.syllable_walk_tui.services.exporter import export_names_to_txt

if TYPE_CHECKING:
    from build_tools.syllable_walk_tui.modules.generator import CombinerState, SelectorState
    from build_tools.syllable_walk_tui.modules.oscillator import PatchState


[docs] @dataclass class SelectorResult: """Result from selector execution.""" selected: list[dict] selected_names: list[str] output_path: Path meta_output: dict error: str | None = None
[docs] def run_selector( patch: "PatchState", combiner_state: "CombinerState", selector_state: "SelectorState", ) -> SelectorResult: """ Run name_selector for a patch (mirrors CLI behavior exactly). This function mirrors the CLI: python -m build_tools.name_selector \\ --run-dir <patch.corpus_dir> \\ --candidates <from combiner output> \\ --name-class <name_class> \\ --count <count> \\ --mode <mode> Output is written to: <run-dir>/selections/{prefix}_{name_class}_{N}syl.json TUI Extension: When combiner_state.syllable_mode == "all": - Writes a combined selection file: {prefix}_{name_class}_all.json - Also writes per-length selections: {prefix}_{name_class}_2syl/3syl/4syl.json - Exports matching .txt files for each JSON output Args: patch: PatchState with corpus data combiner_state: CombinerState for candidates path and seed selector_state: SelectorState with selection parameters Returns: SelectorResult with selected names and metadata Note: Caller is responsible for validating patch state and combiner output before calling. """ # Extract values for clarity run_dir = patch.corpus_dir prefix = patch.corpus_type.lower() if patch.corpus_type else "nltk" selector = selector_state combiner = combiner_state # Validate required data if not run_dir: return SelectorResult( selected=[], selected_names=[], output_path=Path(), meta_output={}, error="No corpus directory set", ) if not combiner.last_output_path: return SelectorResult( selected=[], selected_names=[], output_path=Path(), meta_output={}, error="No candidates generated. Run Generate Candidates first.", ) candidates_path = Path(combiner.last_output_path) if not candidates_path.exists(): return SelectorResult( selected=[], selected_names=[], output_path=Path(), meta_output={}, error=f"Candidates file not found: {candidates_path.name}", ) try: # Load candidates with open(candidates_path, encoding="utf-8") as f: candidates_data = json.load(f) candidates = candidates_data.get("candidates", []) if not candidates: return SelectorResult( selected=[], selected_names=[], output_path=Path(), meta_output={}, error="No candidates in file", ) # Load policy policy_path = get_default_policy_path() policies = load_name_classes(policy_path) if selector.name_class not in policies: return SelectorResult( selected=[], selected_names=[], output_path=Path(), meta_output={}, error=f"Unknown name class: {selector.name_class}", ) policy = policies[selector.name_class] # Compute statistics stats = compute_selection_statistics( candidates, policy, mode=selector.mode # type: ignore[arg-type] ) # Select names (combined set) selected = select_names( candidates, policy, count=selector.count, mode=selector.mode, # type: ignore[arg-type] order=selector.order, # type: ignore[arg-type] seed=combiner.seed, ) # Prepare output directory selections_dir = run_dir / "selections" selections_dir.mkdir(parents=True, exist_ok=True) # Extract syllable count from combiner state (supports "all" in TUI) if combiner.syllable_mode == "all": syllables_label = "all" output_filename = f"{prefix}_{selector.name_class}_all.json" else: syllables_label = str(combiner.syllables) output_filename = f"{prefix}_{selector.name_class}_{syllables_label}syl.json" output_path = selections_dir / output_filename # Build output structure output = { "metadata": { "source_candidates": candidates_path.name, "name_class": selector.name_class, "policy_description": policy.description, "policy_file": str(policy_path), "mode": selector.mode, "order": selector.order, "seed": combiner.seed, "total_evaluated": stats["total_evaluated"], "admitted": stats["admitted"], "rejected": stats["rejected"], "rejection_reasons": stats["rejection_reasons"], "score_distribution": stats["score_distribution"], "output_count": len(selected), "generated_at": datetime.now(timezone.utc).isoformat(), }, "selections": selected, } # Write output with open(output_path, "w", encoding="utf-8") as f: json.dump(output, f, indent=2) # Auto-export TXT for combined output when using "all" if combiner.syllable_mode == "all": export_names_to_txt([s["name"] for s in selected], str(output_path)) warnings: list[str] = [] # If "all", also generate per-syllable selections + txt if combiner.syllable_mode == "all": missing: list[str] = [] candidates_files = combiner.last_candidates_files or {} for syllable_count in ["2", "3", "4"]: candidates_file = candidates_files.get(syllable_count) if not candidates_file: missing.append(syllable_count) continue per_path = Path(candidates_file) if not per_path.exists(): missing.append(syllable_count) continue with open(per_path, encoding="utf-8") as f: per_candidates_data = json.load(f) per_candidates = per_candidates_data.get("candidates", []) if not per_candidates: continue per_stats = compute_selection_statistics( per_candidates, policy, mode=selector.mode # type: ignore[arg-type] ) per_selected = select_names( per_candidates, policy, count=selector.count, mode=selector.mode, # type: ignore[arg-type] order=selector.order, # type: ignore[arg-type] seed=combiner.seed, ) per_output_filename = f"{prefix}_{selector.name_class}_{syllable_count}syl.json" per_output_path = selections_dir / per_output_filename per_output = { "metadata": { "source_candidates": Path(candidates_file).name, "name_class": selector.name_class, "policy_description": policy.description, "policy_file": str(policy_path), "mode": selector.mode, "order": selector.order, "seed": combiner.seed, "total_evaluated": per_stats["total_evaluated"], "admitted": per_stats["admitted"], "rejected": per_stats["rejected"], "rejection_reasons": per_stats["rejection_reasons"], "score_distribution": per_stats["score_distribution"], "output_count": len(per_selected), "generated_at": datetime.now(timezone.utc).isoformat(), }, "selections": per_selected, } with open(per_output_path, "w", encoding="utf-8") as f: json.dump(per_output, f, indent=2) export_names_to_txt([s["name"] for s in per_selected], str(per_output_path)) if missing: warnings.append( "Missing candidates files for syllable counts: " + ", ".join(missing) ) # Build meta file meta_output = { "tool": "name_selector", "version": "1.0.0", "generated_at": datetime.now(timezone.utc).isoformat(), "arguments": { "run_dir": str(run_dir), "candidates": str(candidates_path), "name_class": selector.name_class, "policy_file": str(policy_path), "count": selector.count, "mode": selector.mode, "order": selector.order, "seed": combiner.seed, }, "input": { "candidates_file": str(candidates_path), "candidates_loaded": len(candidates), "policy_file": str(policy_path), "policy_name": selector.name_class, "policy_description": policy.description, }, "output": { "selections_file": str(output_path), "selections_count": len(selected), }, "statistics": { "total_evaluated": stats["total_evaluated"], "admitted": stats["admitted"], "admitted_percentage": ( round(stats["admitted"] / stats["total_evaluated"] * 100, 2) if stats["total_evaluated"] > 0 else 0 ), "rejected": stats["rejected"], "rejection_reasons": stats["rejection_reasons"], "score_distribution": stats["score_distribution"], "mode": selector.mode, "source_prefix": prefix, "syllable_count": syllables_label, }, } if warnings: meta_output["warnings"] = warnings # Write meta file meta_filename = f"{prefix}_selector_meta.json" meta_path = selections_dir / meta_filename with open(meta_path, "w", encoding="utf-8") as f: json.dump(meta_output, f, indent=2) # Extract names for convenience selected_names = [s["name"] for s in selected] return SelectorResult( selected=selected, selected_names=selected_names, output_path=output_path, meta_output=meta_output, error=None, ) except Exception as e: return SelectorResult( selected=[], selected_names=[], output_path=Path(), meta_output={}, error=str(e), )