Source code for build_tools.tui_common.ledger

"""
Shared corpus database ledger helpers for extraction tools.

This module provides a context manager and helper functions for integrating
with the corpus database ledger. The ledger is observational only - it records
what happened but does not influence extraction behavior.

These utilities eliminate duplicated corpus DB integration patterns across
the pyphen and NLTK syllable extractors.

Usage::

    from build_tools.tui_common.ledger import ExtractionLedgerContext

    with ExtractionLedgerContext(
        extractor_tool="pyphen_syllable_extractor",
        extractor_version="0.5.0",
        min_len=2,
        max_len=8,
        quiet=False,
    ) as ctx:
        # Record inputs
        ctx.record_input(input_path)

        # ... do extraction ...

        # Record outputs
        ctx.record_output(
            output_path=syllables_path,
            unique_syllable_count=len(syllables),
            meta_path=metadata_path,
        )

        # Mark success or failure
        ctx.set_result(success=True)
"""

from __future__ import annotations

import sys
from collections.abc import Callable
from functools import partial
from pathlib import Path
from typing import TYPE_CHECKING, Any

from build_tools.tui_common.cli_utils import CORPUS_DB_AVAILABLE, record_corpus_db_safe

if TYPE_CHECKING:
    from build_tools.corpus_db import CorpusLedger



[docs]
class ExtractionLedgerContext:
    """
    Context manager for corpus database ledger integration.

    Handles the full lifecycle of ledger operations:
    - Initialize ledger on entry
    - Start run with extraction parameters
    - Record inputs and outputs during extraction
    - Complete run with success/failure status on exit
    - Close ledger connection

    All operations are safe - failures are logged but don't block extraction.

    Attributes:
        extractor_tool: Name of the extraction tool
        extractor_version: Version string of the tool
        pyphen_lang: Language code for pyphen (None for NLTK)
        min_len: Minimum syllable length constraint
        max_len: Maximum syllable length constraint
        recursive: Whether directory scanning was recursive
        pattern: File pattern for directory scanning
        command_line: Full command-line invocation
        quiet: Suppress warning messages

    Example:
        >>> with ExtractionLedgerContext(
        ...     extractor_tool="pyphen_syllable_extractor",
        ...     extractor_version="0.5.0",
        ...     pyphen_lang="en_US",
        ...     min_len=2,
        ...     max_len=8,
        ... ) as ctx:
        ...     ctx.record_input(Path("input.txt"))
        ...     # ... extraction ...
        ...     ctx.record_output(syllables_path, len(syllables), metadata_path)
        ...     ctx.set_result(success=True)
    """

    def __init__(
        self,
        extractor_tool: str,
        extractor_version: str = "unknown",
        pyphen_lang: str | None = None,
        min_len: int | None = None,
        max_len: int | None = None,
        recursive: bool = False,
        pattern: str | None = None,
        command_line: str | None = None,
        quiet: bool = False,
    ) -> None:
        """
        Initialize the ledger context.

        Args:
            extractor_tool: Name of the extraction tool
            extractor_version: Version string of the tool
            pyphen_lang: Language code for pyphen (None for NLTK or auto-detect)
            min_len: Minimum syllable length constraint
            max_len: Maximum syllable length constraint
            recursive: Whether directory scanning was recursive
            pattern: File pattern for directory scanning
            command_line: Full command-line invocation (defaults to sys.argv)
            quiet: Suppress warning messages
        """
        self.extractor_tool = extractor_tool
        self.extractor_version = extractor_version
        self.pyphen_lang = pyphen_lang
        self.min_len = min_len
        self.max_len = max_len
        self.recursive = recursive
        self.pattern = pattern
        self.command_line = command_line or " ".join(sys.argv)
        self.quiet = quiet

        self._ledger: CorpusLedger | None = None
        self._run_id: int | None = None
        self._success: bool | None = None

    @property
    def is_available(self) -> bool:
        """Check if corpus DB integration is available and initialized."""
        return self._ledger is not None and self._run_id is not None

    @property
    def run_id(self) -> int | None:
        """Get the current run ID, or None if not initialized."""
        return self._run_id

    def __enter__(self) -> "ExtractionLedgerContext":
        """
        Enter context: initialize ledger and start run.

        Returns:
            Self for use in with statement.
        """
        if not CORPUS_DB_AVAILABLE:
            return self

        try:
            from build_tools.corpus_db import CorpusLedger

            self._ledger = CorpusLedger()
            self._run_id = self._ledger.start_run(
                extractor_tool=self.extractor_tool,
                extractor_version=self.extractor_version,
                pyphen_lang=self.pyphen_lang,
                min_len=self.min_len,
                max_len=self.max_len,
                recursive=self.recursive,
                pattern=self.pattern,
                command_line=self.command_line,
            )
        except Exception as e:
            if not self.quiet:
                print(f"Warning: Failed to initialize corpus_db: {e}", file=sys.stderr)
            self._ledger = None
            self._run_id = None

        return self

    def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
        """
        Exit context: complete run and close ledger.

        Determines success based on:
        1. Explicit set_result() call
        2. Whether an exception occurred
        """
        if not self.is_available:
            return

        # Determine final status
        if exc_type is not None:
            # Exception occurred
            exit_code = 1
            status = "failed"
        elif self._success is False:
            # Explicitly marked as failed
            exit_code = 1
            status = "failed"
        elif self._success is True:
            # Explicitly marked as success
            exit_code = 0
            status = "completed"
        else:
            # No explicit result, assume success if no exception
            exit_code = 0
            status = "completed"

        # Complete the run (ledger is guaranteed non-None when is_available is True)
        ledger = self._ledger
        run_id = self._run_id
        if ledger is not None and run_id is not None:
            self._safe_call(
                "complete run",
                lambda: ledger.complete_run(run_id, exit_code=exit_code, status=status),
            )

            # Close the ledger
            self._safe_call("close ledger", lambda: ledger.close(), quiet=True)

    def _safe_call(
        self,
        operation: str,
        func: Callable[[], Any],
        quiet: bool | None = None,
    ) -> Any:
        """
        Execute a ledger operation with safe error handling.

        Args:
            operation: Description of the operation
            func: Callable to execute
            quiet: Override instance quiet setting

        Returns:
            Result of func() if successful, None if failed
        """
        if quiet is None:
            quiet = self.quiet
        return record_corpus_db_safe(operation, func, quiet=quiet)


[docs]
    def set_result(self, success: bool) -> None:
        """
        Explicitly set the extraction result.

        Call this before exiting the context to indicate success or failure.
        If not called, success is assumed unless an exception occurs.

        Args:
            success: True if extraction succeeded, False if failed
        """
        self._success = success



[docs]
    def record_input(
        self,
        source_path: Path,
        file_count: int | None = None,
    ) -> None:
        """
        Record an input source for this run.

        Args:
            source_path: Path to input file or directory
            file_count: Number of files if source_path is a directory
        """
        if not self.is_available:
            return

        ledger = self._ledger
        run_id = self._run_id
        if ledger is not None and run_id is not None:
            self._safe_call(
                "input",
                lambda: ledger.record_input(run_id, source_path, file_count),
            )



[docs]
    def record_inputs(
        self,
        files: list[Path],
        source_dir: Path | None = None,
    ) -> None:
        """
        Record multiple input files for this run.

        If source_dir is provided, records the directory with file count.
        Otherwise, records each file individually.

        Args:
            files: List of input file paths
            source_dir: Source directory (if files were discovered from a directory)
        """
        if not self.is_available:
            return

        ledger = self._ledger
        run_id = self._run_id
        if ledger is not None and run_id is not None:
            if source_dir is not None:
                # Record directory with file count
                self._safe_call(
                    "input",
                    lambda: ledger.record_input(run_id, source_dir, file_count=len(files)),
                )
            else:
                # Record each file individually
                for fp in files:
                    self._safe_call("input", partial(ledger.record_input, run_id, fp))



[docs]
    def record_output(
        self,
        output_path: Path,
        unique_syllable_count: int | None = None,
        meta_path: Path | None = None,
    ) -> None:
        """
        Record an output file for this run.

        Args:
            output_path: Path to generated syllables file
            unique_syllable_count: Number of unique syllables extracted
            meta_path: Path to corresponding metadata file
        """
        if not self.is_available:
            return

        ledger = self._ledger
        run_id = self._run_id
        if ledger is not None and run_id is not None:
            self._safe_call(
                "output",
                lambda: ledger.record_output(
                    run_id,
                    output_path=output_path,
                    unique_syllable_count=unique_syllable_count,
                    meta_path=meta_path,
                ),
            )




__all__ = [
    "ExtractionLedgerContext",
    "CORPUS_DB_AVAILABLE",
]