Source code for build_tools.syllable_walk_tui.modules.generator.state

"""
State management for name generation modules.

This module provides dataclasses for managing name combiner and selector
configuration. The states mirror the exact CLI options from the tools,
with small TUI-only extensions where needed.

CombinerState CLI Options:
    --run-dir          → source_patch (A or B, uses patch's corpus_dir)
    --syllables        → syllables (2, 3, or 4) [TUI also supports "all" via syllable_mode]
    --count            → count (default: 10000)
    --seed             → seed (None = random)
    --frequency-weight → frequency_weight (0.0-1.0, default: 1.0)

SelectorState CLI Options:
    --run-dir          → source_patch (A or B, uses patch's corpus_dir)
    --candidates       → Determined by combiner output (syllables)
    --name-class       → name_class (first_name, last_name, etc.)
    --count            → count (default: 100)
    --mode             → mode (hard or soft, default: hard)

Usage:
    >>> combiner = CombinerState()
    >>> combiner.syllables = 3
    >>> selector = SelectorState()
    >>> selector.name_class = "first_name"
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import Literal


[docs] @dataclass class CombinerState: """ State for name combiner configuration. Mirrors the exact CLI options from build_tools/name_combiner. Attributes: source_patch: Which patch's corpus to use ("A" or "B") Maps to CLI --run-dir (uses patch's corpus_dir) syllables: Number of syllables per name (2, 3, or 4) Maps to CLI --syllables when syllable_mode="exact" syllable_mode: "exact" for a single syllable count, "all" to generate 2/3/4 syllable candidates in one run (TUI-only) count: Number of candidates to generate Maps to CLI --count (default: 10000) seed: RNG seed for deterministic output (None = random) Maps to CLI --seed frequency_weight: Weight for frequency-biased sampling (0.0-1.0) Maps to CLI --frequency-weight (default: 1.0) outputs: List of generated candidate names (for display) last_output_path: Path where candidates were written last_unique_count: Unique name count from last combiner run last_candidates_files: Mapping of syllable count to candidates file path """ # Source selection - which patch's corpus to use # Maps to CLI --run-dir (via patch.corpus_dir) source_patch: Literal["A", "B"] = "A" # Number of syllables per candidate name (2, 3, or 4) # Maps to CLI --syllables (required, choices=[2, 3, 4]) syllables: int = 2 # TUI-only mode for syllable count selection # "exact" uses the syllables value above, "all" generates 2/3/4 syllable_mode: Literal["exact", "all"] = "exact" # Number of candidates to generate # Maps to CLI --count (default: 10000) count: int = 10000 # RNG seed for deterministic output # Maps to CLI --seed (default: None = system entropy) seed: int | None = None # Weight for frequency-biased sampling # 0.0 = uniform sampling, 1.0 = fully frequency-weighted # Maps to CLI --frequency-weight (default: 1.0) frequency_weight: float = 1.0 # Output storage (for display in TUI) outputs: list[str] = field(default_factory=list) last_output_path: str | None = None last_unique_count: int | None = None last_candidates_files: dict[str, str] | None = None
# Available name classes from data/name_classes.yml NAME_CLASSES = [ "first_name", "last_name", "place_name", "location_name", "object_item", "organisation", "title_epithet", ]
[docs] @dataclass class SelectorState: """ State for name selector configuration. Mirrors the exact CLI options from build_tools/name_selector. Attributes: name_class: Name class policy to use for selection Maps to CLI --name-class (required) count: Maximum number of names to output Maps to CLI --count (default: 100) count_mode: "manual" to use count value, "unique" to use the combiner's unique candidate count (TUI-only) mode: Evaluation mode - "hard" rejects, "soft" penalizes Maps to CLI --mode (default: "hard") order: Ordering for names with equal scores "alphabetical" for deterministic, "random" for variety outputs: List of selected names (for display) last_output_path: Path where selections were written last_candidates_path: Path to candidates file used """ # Name class policy to use # Maps to CLI --name-class (required) # Available: first_name, last_name, place_name, location_name, # object_item, organisation, title_epithet name_class: str = "first_name" # Maximum number of names to output # Maps to CLI --count (default: 100) count: int = 100 # TUI-only: how count is determined # "manual" uses count above, "unique" uses combiner unique count count_mode: Literal["manual", "unique"] = "manual" # Evaluation mode # "hard" = reject candidates with discouraged features # "soft" = apply -10 penalty instead of rejection # Maps to CLI --mode (default: "hard") mode: Literal["hard", "soft"] = "hard" # Ordering for names with equal scores # "alphabetical" = deterministic, sorted by name # "random" = shuffled within score groups (seeded) order: Literal["alphabetical", "random"] = "random" # Output storage (for display in TUI) outputs: list[str] = field(default_factory=list) last_output_path: str | None = None last_candidates_path: str | None = None