"""
File I/O operations for syllable extraction.
This module handles all file reading, writing, and output generation
for the syllable extractor.
"""
from datetime import datetime
from pathlib import Path
from typing import Optional
from .models import ExtractionResult
# Default output directory (relative to project root)
DEFAULT_OUTPUT_DIR = Path("_working/output")
[docs]
def generate_output_filename(
output_dir: Optional[Path] = None,
language_code: Optional[str] = None,
run_timestamp: Optional[str] = None,
input_filename: Optional[str] = None,
) -> tuple[Path, Path]:
"""
Generate output filenames in run-based subdirectory structure.
Creates a run directory with timestamp and 'pyphen' identifier, then organizes
outputs into syllables/ and meta/ subdirectories:
- output_dir/YYYYMMDD_HHMMSS_pyphen/syllables/filename.txt
- output_dir/YYYYMMDD_HHMMSS_pyphen/meta/filename.txt
This structure groups each extraction run's outputs together, making it
easier to manage, archive, or delete complete runs as atomic units.
Args:
output_dir: Base output directory. Defaults to _working/output/
language_code: Optional pyphen language code (e.g., 'en_US', 'de_DE').
Used for filename if input_filename not provided.
run_timestamp: Optional timestamp string (YYYYMMDD_HHMMSS format).
If provided, uses this timestamp for the run directory name.
If not provided, generates a new timestamp using datetime.now().
**Critical for batch processing** - pass the same timestamp to group
all files from a batch into one run directory.
input_filename: Optional input filename to use for output naming.
If provided, output files will use this name (e.g., 'alice.txt').
Takes precedence over language_code for naming.
Returns:
Tuple of (syllables_path, metadata_path)
Example:
>>> # Interactive mode - single file with language code
>>> syllables_path, meta_path = generate_output_filename(language_code='en_US')
>>> print(syllables_path)
_working/output/20260110_153022_pyphen/syllables/en_US.txt
>>> # Batch mode - multiple files sharing one run directory
>>> timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
>>> s1, m1 = generate_output_filename(
... run_timestamp=timestamp,
... input_filename='alice.txt'
... )
>>> s2, m2 = generate_output_filename(
... run_timestamp=timestamp,
... input_filename='middlemarch.txt'
... )
>>> print(s1)
_working/output/20260110_153022_pyphen/syllables/alice.txt
>>> print(s2)
_working/output/20260110_153022_pyphen/syllables/middlemarch.txt
>>> # Both files share the same run directory
Note:
For batch processing, always pass the same run_timestamp to group all
outputs into a single run directory. This represents one logical batch
operation, regardless of how many input files are processed.
"""
if output_dir is None:
output_dir = DEFAULT_OUTPUT_DIR
# Generate timestamp string if not provided
if run_timestamp is None:
run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Create run directory structure (with pyphen identifier)
run_dir = output_dir / f"{run_timestamp}_pyphen"
syllables_dir = run_dir / "syllables"
meta_dir = run_dir / "meta"
# Ensure subdirectories exist
syllables_dir.mkdir(parents=True, exist_ok=True)
meta_dir.mkdir(parents=True, exist_ok=True)
# Determine output filename (priority: input_filename > language_code > defaults)
if input_filename:
output_name = input_filename
elif language_code:
output_name = f"{language_code}.txt"
else:
output_name = "syllables.txt"
# Build full paths
syllables_path = syllables_dir / output_name
metadata_path = meta_dir / output_name
return syllables_path, metadata_path