Source code for build_tools.tui_common.ledger
"""
Shared corpus database ledger helpers for extraction tools.
This module provides a context manager and helper functions for integrating
with the corpus database ledger. The ledger is observational only - it records
what happened but does not influence extraction behavior.
These utilities eliminate duplicated corpus DB integration patterns across
the pyphen and NLTK syllable extractors.
Usage::
from build_tools.tui_common.ledger import ExtractionLedgerContext
with ExtractionLedgerContext(
extractor_tool="pyphen_syllable_extractor",
extractor_version="0.5.0",
min_len=2,
max_len=8,
quiet=False,
) as ctx:
# Record inputs
ctx.record_input(input_path)
# ... do extraction ...
# Record outputs
ctx.record_output(
output_path=syllables_path,
unique_syllable_count=len(syllables),
meta_path=metadata_path,
)
# Mark success or failure
ctx.set_result(success=True)
"""
from __future__ import annotations
import sys
from collections.abc import Callable
from functools import partial
from pathlib import Path
from typing import TYPE_CHECKING, Any
from build_tools.tui_common.cli_utils import CORPUS_DB_AVAILABLE, record_corpus_db_safe
if TYPE_CHECKING:
from build_tools.corpus_db import CorpusLedger
[docs]
class ExtractionLedgerContext:
"""
Context manager for corpus database ledger integration.
Handles the full lifecycle of ledger operations:
- Initialize ledger on entry
- Start run with extraction parameters
- Record inputs and outputs during extraction
- Complete run with success/failure status on exit
- Close ledger connection
All operations are safe - failures are logged but don't block extraction.
Attributes:
extractor_tool: Name of the extraction tool
extractor_version: Version string of the tool
pyphen_lang: Language code for pyphen (None for NLTK)
min_len: Minimum syllable length constraint
max_len: Maximum syllable length constraint
recursive: Whether directory scanning was recursive
pattern: File pattern for directory scanning
command_line: Full command-line invocation
quiet: Suppress warning messages
Example:
>>> with ExtractionLedgerContext(
... extractor_tool="pyphen_syllable_extractor",
... extractor_version="0.5.0",
... pyphen_lang="en_US",
... min_len=2,
... max_len=8,
... ) as ctx:
... ctx.record_input(Path("input.txt"))
... # ... extraction ...
... ctx.record_output(syllables_path, len(syllables), metadata_path)
... ctx.set_result(success=True)
"""
def __init__(
self,
extractor_tool: str,
extractor_version: str = "unknown",
pyphen_lang: str | None = None,
min_len: int | None = None,
max_len: int | None = None,
recursive: bool = False,
pattern: str | None = None,
command_line: str | None = None,
quiet: bool = False,
) -> None:
"""
Initialize the ledger context.
Args:
extractor_tool: Name of the extraction tool
extractor_version: Version string of the tool
pyphen_lang: Language code for pyphen (None for NLTK or auto-detect)
min_len: Minimum syllable length constraint
max_len: Maximum syllable length constraint
recursive: Whether directory scanning was recursive
pattern: File pattern for directory scanning
command_line: Full command-line invocation (defaults to sys.argv)
quiet: Suppress warning messages
"""
self.extractor_tool = extractor_tool
self.extractor_version = extractor_version
self.pyphen_lang = pyphen_lang
self.min_len = min_len
self.max_len = max_len
self.recursive = recursive
self.pattern = pattern
self.command_line = command_line or " ".join(sys.argv)
self.quiet = quiet
self._ledger: CorpusLedger | None = None
self._run_id: int | None = None
self._success: bool | None = None
@property
def is_available(self) -> bool:
"""Check if corpus DB integration is available and initialized."""
return self._ledger is not None and self._run_id is not None
@property
def run_id(self) -> int | None:
"""Get the current run ID, or None if not initialized."""
return self._run_id
def __enter__(self) -> "ExtractionLedgerContext":
"""
Enter context: initialize ledger and start run.
Returns:
Self for use in with statement.
"""
if not CORPUS_DB_AVAILABLE:
return self
try:
from build_tools.corpus_db import CorpusLedger
self._ledger = CorpusLedger()
self._run_id = self._ledger.start_run(
extractor_tool=self.extractor_tool,
extractor_version=self.extractor_version,
pyphen_lang=self.pyphen_lang,
min_len=self.min_len,
max_len=self.max_len,
recursive=self.recursive,
pattern=self.pattern,
command_line=self.command_line,
)
except Exception as e:
if not self.quiet:
print(f"Warning: Failed to initialize corpus_db: {e}", file=sys.stderr)
self._ledger = None
self._run_id = None
return self
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
"""
Exit context: complete run and close ledger.
Determines success based on:
1. Explicit set_result() call
2. Whether an exception occurred
"""
if not self.is_available:
return
# Determine final status
if exc_type is not None:
# Exception occurred
exit_code = 1
status = "failed"
elif self._success is False:
# Explicitly marked as failed
exit_code = 1
status = "failed"
elif self._success is True:
# Explicitly marked as success
exit_code = 0
status = "completed"
else:
# No explicit result, assume success if no exception
exit_code = 0
status = "completed"
# Complete the run (ledger is guaranteed non-None when is_available is True)
ledger = self._ledger
run_id = self._run_id
if ledger is not None and run_id is not None:
self._safe_call(
"complete run",
lambda: ledger.complete_run(run_id, exit_code=exit_code, status=status),
)
# Close the ledger
self._safe_call("close ledger", lambda: ledger.close(), quiet=True)
def _safe_call(
self,
operation: str,
func: Callable[[], Any],
quiet: bool | None = None,
) -> Any:
"""
Execute a ledger operation with safe error handling.
Args:
operation: Description of the operation
func: Callable to execute
quiet: Override instance quiet setting
Returns:
Result of func() if successful, None if failed
"""
if quiet is None:
quiet = self.quiet
return record_corpus_db_safe(operation, func, quiet=quiet)
[docs]
def set_result(self, success: bool) -> None:
"""
Explicitly set the extraction result.
Call this before exiting the context to indicate success or failure.
If not called, success is assumed unless an exception occurs.
Args:
success: True if extraction succeeded, False if failed
"""
self._success = success
[docs]
def record_input(
self,
source_path: Path,
file_count: int | None = None,
) -> None:
"""
Record an input source for this run.
Args:
source_path: Path to input file or directory
file_count: Number of files if source_path is a directory
"""
if not self.is_available:
return
ledger = self._ledger
run_id = self._run_id
if ledger is not None and run_id is not None:
self._safe_call(
"input",
lambda: ledger.record_input(run_id, source_path, file_count),
)
[docs]
def record_inputs(
self,
files: list[Path],
source_dir: Path | None = None,
) -> None:
"""
Record multiple input files for this run.
If source_dir is provided, records the directory with file count.
Otherwise, records each file individually.
Args:
files: List of input file paths
source_dir: Source directory (if files were discovered from a directory)
"""
if not self.is_available:
return
ledger = self._ledger
run_id = self._run_id
if ledger is not None and run_id is not None:
if source_dir is not None:
# Record directory with file count
self._safe_call(
"input",
lambda: ledger.record_input(run_id, source_dir, file_count=len(files)),
)
else:
# Record each file individually
for fp in files:
self._safe_call("input", partial(ledger.record_input, run_id, fp))
[docs]
def record_output(
self,
output_path: Path,
unique_syllable_count: int | None = None,
meta_path: Path | None = None,
) -> None:
"""
Record an output file for this run.
Args:
output_path: Path to generated syllables file
unique_syllable_count: Number of unique syllables extracted
meta_path: Path to corresponding metadata file
"""
if not self.is_available:
return
ledger = self._ledger
run_id = self._run_id
if ledger is not None and run_id is not None:
self._safe_call(
"output",
lambda: ledger.record_output(
run_id,
output_path=output_path,
unique_syllable_count=unique_syllable_count,
meta_path=meta_path,
),
)
__all__ = [
"ExtractionLedgerContext",
"CORPUS_DB_AVAILABLE",
]