Source code for build_tools.pipeline_tui.services.validators

"""
Directory validators for Pipeline TUI browsers.

This module provides validation functions for the DirectoryBrowserScreen
that check whether directories are suitable for source input or output.

**Validator Function Signature:**

All validators return a tuple of ``(is_valid, type_label, message)``:

- ``is_valid``: True if directory can be selected
- ``type_label``: Short label describing valid directories (e.g., "source")
- ``message``: Error message if invalid, or description if valid

**Example Usage:**

.. code-block:: python

    from build_tools.tui_common.controls import DirectoryBrowserScreen
    from build_tools.pipeline_tui.services.validators import validate_source_directory

    result = await app.push_screen_wait(
        DirectoryBrowserScreen(
            title="Select Source",
            validator=validate_source_directory,
        )
    )
"""

from __future__ import annotations

from pathlib import Path


[docs] def validate_source_directory(path: Path) -> tuple[bool, str, str]: """ Validate a directory as a source for text extraction. A valid source directory contains at least one ``.txt`` file, either directly or in subdirectories. Args: path: Directory path to validate Returns: Tuple of (is_valid, type_label, message): - is_valid: True if directory contains extractable files - type_label: "source" if valid - message: File count if valid, error description if invalid """ if not path.is_dir(): return (False, "", "Not a directory") # Count .txt files (direct children and recursive) direct_txt = list(path.glob("*.txt")) all_txt = list(path.glob("**/*.txt")) if not all_txt: return (False, "", "No .txt files found") # Build informative message if len(direct_txt) == len(all_txt): msg = f"Found {len(all_txt)} text file(s)" else: msg = f"Found {len(all_txt)} text file(s) ({len(direct_txt)} direct)" return (True, "source", msg)
[docs] def validate_output_directory(path: Path) -> tuple[bool, str, str]: """ Validate a directory as an output location for pipeline results. Any existing directory is valid. Non-existent paths are invalid (the pipeline will create timestamped subdirectories, but the parent must exist). Args: path: Directory path to validate Returns: Tuple of (is_valid, type_label, message): - is_valid: True if directory exists and is writable - type_label: "output" if valid - message: Status description """ if not path.exists(): return (False, "", "Directory does not exist") if not path.is_dir(): return (False, "", "Not a directory") # Check if writable by attempting to check access # Note: This is a basic check; actual write may still fail try: # Check if we can list the directory (basic access test) list(path.iterdir()) except PermissionError: return (False, "", "Permission denied") # Count existing pipeline runs (directories matching timestamp pattern) existing_runs = [ d for d in path.iterdir() if d.is_dir() and (d.name.endswith("_pyphen") or d.name.endswith("_nltk")) ] if existing_runs: msg = f"Valid output ({len(existing_runs)} existing runs)" else: msg = "Valid output directory" return (True, "output", msg)
[docs] def validate_corpus_directory(path: Path) -> tuple[bool, str, str]: """ Validate a directory as a processed corpus (for syllable_walk_tui compatibility). A valid corpus directory contains either NLTK or pyphen normalized output: - NLTK corpus: ``nltk_syllables_unique.txt`` and ``nltk_syllables_frequencies.json`` - Pyphen corpus: ``pyphen_syllables_unique.txt`` and ``pyphen_syllables_frequencies.json`` This function is provided for compatibility with syllable_walk_tui, which needs to select processed corpus directories. Args: path: Directory path to validate Returns: Tuple of (is_valid, corpus_type, message): - is_valid: True if directory contains valid corpus files - corpus_type: "nltk" or "pyphen" if valid, empty if invalid - message: Corpus info if valid, error description if invalid """ if not path.is_dir(): return (False, "", "Not a directory") # Check for NLTK corpus files nltk_unique = path / "nltk_syllables_unique.txt" nltk_freq = path / "nltk_syllables_frequencies.json" if nltk_unique.exists() and nltk_freq.exists(): # Count syllables for info message try: syllable_count = sum(1 for _ in nltk_unique.open()) return (True, "nltk", f"NLTK corpus ({syllable_count:,} syllables)") except Exception: return (True, "nltk", "NLTK corpus") # Check for pyphen corpus files pyphen_unique = path / "pyphen_syllables_unique.txt" pyphen_freq = path / "pyphen_syllables_frequencies.json" if pyphen_unique.exists() and pyphen_freq.exists(): # Count syllables for info message try: syllable_count = sum(1 for _ in pyphen_unique.open()) return (True, "pyphen", f"Pyphen corpus ({syllable_count:,} syllables)") except Exception: return (True, "pyphen", "Pyphen corpus") # No valid corpus found return ( False, "", "No corpus files found (missing *_syllables_unique.txt or *_frequencies.json)", )