Source code for build_tools.syllable_walk_tui.services.combiner_runner

"""
Name combiner execution service.

Mirrors the CLI behavior of build_tools.name_combiner.
"""

import json
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING

from build_tools.name_combiner.combiner import combine_syllables

if TYPE_CHECKING:
    from build_tools.syllable_walk_tui.modules.generator import CombinerState
    from build_tools.syllable_walk_tui.modules.oscillator import PatchState


[docs] @dataclass class CombinerResult: """Result from combiner execution.""" candidates: list[dict] output_path: Path meta_output: dict error: str | None = None
[docs] def run_combiner( patch: "PatchState", combiner_state: "CombinerState", ) -> CombinerResult: """ Run name_combiner for a patch (mirrors CLI behavior exactly). This function mirrors the CLI: python -m build_tools.name_combiner \\ --run-dir <patch.corpus_dir> \\ --syllables <syllables> \\ --count <count> \\ --seed <seed> \\ --frequency-weight <frequency_weight> Output is written to: <run-dir>/candidates/{prefix}_candidates_{N}syl.json TUI Extension: When combiner_state.syllable_mode == "all", this function also: - Generates candidates for 2, 3, and 4 syllables - Writes per-length files: {prefix}_candidates_2syl.json, etc. - Writes a combined file: {prefix}_candidates_all.json - Returns combined candidates in the result Args: patch: PatchState with corpus data combiner_state: CombinerState with generation parameters Returns: CombinerResult with generated candidates and metadata Note: Caller is responsible for validating patch state before calling. """ # Extract values for clarity run_dir = patch.corpus_dir prefix = patch.corpus_type.lower() if patch.corpus_type else "nltk" comb = combiner_state # Validate required data if not run_dir: return CombinerResult( candidates=[], output_path=Path(), meta_output={}, error="No corpus directory set", ) if not patch.annotated_data: return CombinerResult( candidates=[], output_path=Path(), meta_output={}, error="Annotated data not loaded", ) try: # === Prepare output directory (mirrors CLI) === candidates_dir = run_dir / "candidates" candidates_dir.mkdir(parents=True, exist_ok=True) # Determine syllable counts if comb.syllable_mode == "all": syllable_counts = [2, 3, 4] else: syllable_counts = [comb.syllables] all_candidates: list[dict] = [] per_syllable_files: dict[str, str] = {} per_syllable_counts: dict[str, int] = {} last_output_path: Path | None = None # === Generate candidates per syllable count === for syllable_count in syllable_counts: candidates = combine_syllables( annotated_data=patch.annotated_data, syllable_count=syllable_count, count=comb.count, seed=comb.seed, frequency_weight=comb.frequency_weight, ) output_filename = f"{prefix}_candidates_{syllable_count}syl.json" output_path = candidates_dir / output_filename output = { "metadata": { "source_run": run_dir.name, "source_annotated": f"{prefix}_syllables_annotated.json", "syllable_count": syllable_count, "total_candidates": len(candidates), "seed": comb.seed, "frequency_weight": comb.frequency_weight, "aggregation_rule": "majority", "generated_at": datetime.now(timezone.utc).isoformat(), }, "candidates": candidates, } with open(output_path, "w", encoding="utf-8") as f: json.dump(output, f, indent=2) per_syllable_files[str(syllable_count)] = str(output_path) per_syllable_counts[str(syllable_count)] = len(candidates) last_output_path = output_path if comb.syllable_mode == "all": all_candidates.extend(candidates) else: all_candidates = candidates # === If "all", also write combined file === if comb.syllable_mode == "all": combined_filename = f"{prefix}_candidates_all.json" combined_path = candidates_dir / combined_filename combined_output = { "metadata": { "source_run": run_dir.name, "source_annotated": f"{prefix}_syllables_annotated.json", "syllable_count": "all", "syllable_counts": syllable_counts, "total_candidates": len(all_candidates), "seed": comb.seed, "frequency_weight": comb.frequency_weight, "aggregation_rule": "majority", "generated_at": datetime.now(timezone.utc).isoformat(), "candidates_files": per_syllable_files, }, "candidates": all_candidates, } with open(combined_path, "w", encoding="utf-8") as f: json.dump(combined_output, f, indent=2) last_output_path = combined_path if last_output_path is None: raise ValueError("No candidates were generated") # === Build meta file (mirrors CLI with TUI extensions) === unique_names = len(set(c["name"] for c in all_candidates)) unique_percentage = unique_names / len(all_candidates) * 100 if all_candidates else 0 syllables_arg = "all" if comb.syllable_mode == "all" else comb.syllables meta_output = { "tool": "name_combiner", "version": "1.0.0", "generated_at": datetime.now(timezone.utc).isoformat(), "arguments": { "run_dir": str(run_dir), "syllables": syllables_arg, "syllable_mode": comb.syllable_mode, "syllable_counts": syllable_counts, "count": comb.count, "seed": comb.seed, "frequency_weight": comb.frequency_weight, }, "output": { "candidates_file": str(last_output_path), "candidates_generated": len(all_candidates), "unique_names": unique_names, "unique_percentage": round(unique_percentage, 2), "candidates_files": per_syllable_files, "per_syllable_counts": per_syllable_counts, }, } meta_path = candidates_dir / f"{prefix}_combiner_meta.json" with open(meta_path, "w", encoding="utf-8") as f: json.dump(meta_output, f, indent=2) return CombinerResult( candidates=all_candidates, output_path=last_output_path, meta_output=meta_output, error=None, ) except Exception as e: return CombinerResult( candidates=[], output_path=Path(), meta_output={}, error=str(e), )