Source code for build_tools.syllable_analysis.plotting.static

"""Static matplotlib visualizations for analysis tools.

This module provides matplotlib-based static plotting functions for dimensionality
reduction visualizations. Functions create publication-quality PNG outputs with
comprehensive metadata.

Usage Example
-------------
::

    import numpy as np
    from pathlib import Path
    from build_tools.syllable_analysis.plotting.static import (
        create_tsne_scatter,
        save_static_plot,
        create_metadata_text
    )

    # Create visualization
    tsne_coords = np.array([[...], [...]])  # From t-SNE
    frequencies = [10, 25, 15, ...]
    fig = create_tsne_scatter(tsne_coords, frequencies)

    # Save to PNG
    output_path = Path("_working/output.png")
    save_static_plot(fig, output_path, dpi=300)

    # Generate metadata
    metadata = create_metadata_text(
        output_filename="output.png",
        dpi=300,
        perplexity=30,
        random_state=42,
        processing_time=2.5
    )
    Path("_working/output_meta.txt").write_text(metadata)
"""

from datetime import datetime
from pathlib import Path
from typing import List, Tuple

import matplotlib.pyplot as plt  # type: ignore[import-not-found]
import numpy as np  # type: ignore[import-not-found]

from .styles import (
    AXIS_LABEL_FONT_SIZE,
    DEFAULT_ALPHA,
    DEFAULT_COLORMAP,
    DEFAULT_DPI,
    DEFAULT_FIGURE_SIZE,
    DEFAULT_MARKER_LINE_COLOR,
    DEFAULT_MARKER_LINE_WIDTH,
    TITLE_FONT_SIZE,
)


[docs] def create_tsne_scatter( tsne_coords: np.ndarray, frequencies: List[int], title: str = "t-SNE: Feature Signature Space", figsize: Tuple[int, int] = DEFAULT_FIGURE_SIZE, cmap: str = DEFAULT_COLORMAP, alpha: float = DEFAULT_ALPHA, ) -> plt.Figure: """Create static matplotlib scatter plot of t-SNE coordinates. Generates a publication-quality scatter plot showing t-SNE dimensionality reduction results. Points are sized and colored by frequency, with larger and brighter points indicating higher-frequency syllables. Args: tsne_coords: 2D coordinate array of shape (n_samples, 2) from t-SNE reduction frequencies: Frequency values for each point (used for sizing and coloring) title: Plot title (default: "t-SNE: Feature Signature Space") figsize: Figure size in inches as (width, height) (default: (14, 10)) cmap: Matplotlib colormap name (default: "viridis") alpha: Point transparency, 0.0=transparent to 1.0=opaque (default: 0.6) Returns: matplotlib Figure object with configured scatter plot Raises: ValueError: If tsne_coords shape is invalid or lengths don't match Example: >>> import numpy as np >>> coords = np.random.randn(100, 2) >>> freqs = list(range(1, 101)) >>> fig = create_tsne_scatter(coords, freqs) >>> fig.savefig("output.png", dpi=300) >>> plt.close(fig) Notes: - Point size is proportional to frequency (frequency × 2) - Colorbar is added automatically to show frequency scale - Black edge lines improve visibility of overlapping points - Layout uses tight_layout() for optimal spacing """ # Validate inputs if tsne_coords.ndim != 2 or tsne_coords.shape[1] != 2: raise ValueError( f"tsne_coords must be 2D array with shape (n, 2), got shape {tsne_coords.shape}" ) if len(frequencies) != tsne_coords.shape[0]: raise ValueError( f"frequencies length ({len(frequencies)}) must match " f"tsne_coords rows ({tsne_coords.shape[0]})" ) # Create figure and axis fig, ax = plt.subplots(figsize=figsize) # Convert frequencies to numpy array for scaling freq_array = np.array(frequencies) # Create scatter plot # - Position: t-SNE coordinates # - Size: frequency × 2 (larger points for common syllables) # - Color: frequency (using specified colormap) # - Alpha: transparency to show overlapping points # - Edge: black outline for visibility scatter = ax.scatter( tsne_coords[:, 0], tsne_coords[:, 1], c=freq_array, s=freq_array * 2, # Size proportional to frequency cmap=cmap, alpha=alpha, edgecolors=DEFAULT_MARKER_LINE_COLOR, linewidth=DEFAULT_MARKER_LINE_WIDTH, ) # Configure plot appearance ax.set_title( f"{title}\n(Size and color = frequency)", fontsize=TITLE_FONT_SIZE, fontweight="bold", ) ax.set_xlabel("t-SNE Dimension 1", fontsize=AXIS_LABEL_FONT_SIZE) ax.set_ylabel("t-SNE Dimension 2", fontsize=AXIS_LABEL_FONT_SIZE) # Add colorbar with proper title plt.colorbar(scatter, ax=ax, label="Frequency Count") plt.tight_layout() return fig
[docs] def save_static_plot( fig: plt.Figure, output_path: Path, dpi: int = DEFAULT_DPI, ) -> None: """Save matplotlib figure to PNG file. Saves a matplotlib Figure to a high-resolution PNG file suitable for publication or presentation. Uses tight bounding box to minimize whitespace. Args: fig: Matplotlib Figure object to save output_path: Output PNG file path (parent directory must exist) dpi: Resolution in dots per inch (default: 300 for publication quality) Raises: FileNotFoundError: If parent directory doesn't exist PermissionError: If file cannot be written ValueError: If output_path doesn't end with .png Example: >>> import matplotlib.pyplot as plt >>> from pathlib import Path >>> fig, ax = plt.subplots() >>> ax.plot([1, 2, 3], [1, 4, 9]) >>> save_static_plot(fig, Path("output.png"), dpi=300) >>> plt.close(fig) Notes: - Uses bbox_inches='tight' to remove excess whitespace - Higher DPI values create larger files but better quality - Common DPI values: 150 (screen), 300 (print), 600 (high-quality print) - Figure is NOT automatically closed after saving """ # Validate output path if not str(output_path).endswith(".png"): raise ValueError(f"output_path must end with .png, got: {output_path}") if not output_path.parent.exists(): raise FileNotFoundError(f"Parent directory does not exist: {output_path.parent}") # Save figure with tight bounding box fig.savefig(str(output_path), dpi=dpi, bbox_inches="tight")
[docs] def create_metadata_text( output_filename: str, dpi: int, perplexity: int, random_state: int, processing_time: float, ) -> str: """Generate formatted metadata text for static visualization. Creates a human-readable metadata report describing the visualization parameters, algorithm settings, and interpretation guide. Suitable for saving alongside PNG output files. Args: output_filename: Name of the output PNG file (e.g., "20260107_143022.tsne_visualization.png") dpi: Resolution used for PNG export perplexity: t-SNE perplexity parameter used random_state: Random seed used for reproducibility processing_time: Total processing time in seconds Returns: Formatted multi-line metadata string ready for file output Example: >>> metadata = create_metadata_text( ... output_filename="20260107_143022.tsne_visualization.png", ... dpi=300, ... perplexity=30, ... random_state=42, ... processing_time=2.5 ... ) >>> Path("metadata.txt").write_text(metadata) Notes: - Includes timestamp of generation - Documents all algorithm parameters - Provides interpretation guidance - Uses Unicode box-drawing characters for formatting """ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") metadata_lines = [ "t-SNE VISUALIZATION METADATA", "=" * 60, f"Generated: {timestamp}", f"Output file: {output_filename}", f"Resolution: {dpi} DPI", f"Processing time: {processing_time:.2f} seconds", "", "ALGORITHM PARAMETERS", "-" * 60, "Method: t-SNE (t-distributed Stochastic Neighbor Embedding)", f"Perplexity: {perplexity}", f"Random state: {random_state}", "Distance metric: Hamming (optimal for binary features)", "Dimensions: 2D projection of 12-dimensional binary feature space", "Features: 12 phonetic features (onset, internal, nucleus, coda)", "", "VISUALIZATION ENCODING", "-" * 60, "X-axis: t-SNE Dimension 1", "Y-axis: t-SNE Dimension 2", "Point size: Proportional to syllable frequency", "Point color: Syllable frequency (viridis colormap)", "Edge color: Black outline for visibility", "", "INTERPRETATION GUIDE", "-" * 60, "- Nearby points: Similar phonetic feature patterns", "- Clusters: Natural groupings in feature space", "- Large/bright points: High-frequency syllables", "- Small/dark points: Low-frequency syllables", "- Isolated points: Unique or rare feature combinations", "", "=" * 60, ] return "\n".join(metadata_lines)