"""
Package builder service for the web application.
Bundles walk results, candidates, and selections from both patches
into a downloadable ZIP archive with a manifest. Also writes a
companion ``_metadata.json`` file to disk next to the ZIP for
provenance tracking (matching the TUI packager behaviour).
"""
from __future__ import annotations
import io
import json
import zipfile
from datetime import datetime, timezone
from typing import Any
from build_tools.syllable_walk_web.state import PatchState, ServerState
[docs]
def build_package(
state: ServerState,
*,
name: str = "corpus-package",
version: str = "0.1.0",
include_walks_a: bool = True,
include_walks_b: bool = True,
include_candidates: bool = True,
include_selections: bool = True,
) -> tuple[bytes, str | None]:
"""Build a ZIP archive from in-memory walker state.
Also writes the ZIP and a companion metadata JSON file to disk
under ``<output_base>/packages/``.
Args:
state: Global server state with patch data.
name: Package name (used in the ZIP filename).
version: Package version string.
include_walks_a: Include Patch A walks.
include_walks_b: Include Patch B walks.
include_candidates: Include candidates from both patches.
include_selections: Include selections from both patches.
Returns:
Tuple of (zip_bytes, error_message_or_none).
"""
files_included: list[dict[str, Any]] = []
buf = io.BytesIO()
# This dict is passed to both the in-ZIP manifest and the disk-side
# metadata file so "what was included" is consistent across both.
include_flags = {
"walks_a": include_walks_a,
"walks_b": include_walks_b,
"candidates": include_candidates,
"selections": include_selections,
}
with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_DEFLATED) as zf:
# Patch A walks
if include_walks_a and state.patch_a.walks:
data = json.dumps(state.patch_a.walks, indent=2).encode("utf-8")
zf.writestr("patch_a/walks.json", data)
files_included.append(
{
"path": "patch_a/walks.json",
"type": "walks",
"patch": "a",
"count": len(state.patch_a.walks),
"bytes": len(data),
}
)
# Patch B walks
if include_walks_b and state.patch_b.walks:
data = json.dumps(state.patch_b.walks, indent=2).encode("utf-8")
zf.writestr("patch_b/walks.json", data)
files_included.append(
{
"path": "patch_b/walks.json",
"type": "walks",
"patch": "b",
"count": len(state.patch_b.walks),
"bytes": len(data),
}
)
# Patch A candidates
if include_candidates and state.patch_a.candidates:
data = json.dumps(state.patch_a.candidates, indent=2).encode("utf-8")
zf.writestr("patch_a/candidates.json", data)
files_included.append(
{
"path": "patch_a/candidates.json",
"type": "candidates",
"patch": "a",
"count": len(state.patch_a.candidates),
"bytes": len(data),
}
)
# Patch B candidates
if include_candidates and state.patch_b.candidates:
data = json.dumps(state.patch_b.candidates, indent=2).encode("utf-8")
zf.writestr("patch_b/candidates.json", data)
files_included.append(
{
"path": "patch_b/candidates.json",
"type": "candidates",
"patch": "b",
"count": len(state.patch_b.candidates),
"bytes": len(data),
}
)
# Patch A selections
if include_selections and state.patch_a.selected_names:
_write_selections(zf, "a", state.patch_a, files_included)
# Patch B selections
if include_selections and state.patch_b.selected_names:
_write_selections(zf, "b", state.patch_b, files_included)
# Return an error instead of an empty ZIP — a zero-content archive
# would be confusing; this nudges the user to generate data first.
if not files_included:
return b"", "Nothing to package. Generate walks, candidates, or selections first."
# Manifest (embedded in the ZIP)
manifest = _build_manifest(
name=name,
version=version,
state=state,
files_included=files_included,
include_flags=include_flags,
)
zf.writestr("manifest.json", json.dumps(manifest, indent=2).encode("utf-8"))
zip_bytes = buf.getvalue()
# Write ZIP and metadata JSON to disk for provenance
_persist_to_disk(
state=state,
name=name,
version=version,
zip_bytes=zip_bytes,
manifest=manifest,
include_flags=include_flags,
files_included=files_included,
)
return zip_bytes, None
def _write_selections(
zf: zipfile.ZipFile,
patch_key: str,
patch: PatchState,
files_included: list[dict[str, Any]],
) -> None:
"""Write selection JSON and TXT into the archive."""
# JSON preserves full metadata for reimport into the combiner/selector.
data = json.dumps(patch.selected_names, indent=2).encode("utf-8")
zf.writestr(f"patch_{patch_key}/selections.json", data)
files_included.append(
{
"path": f"patch_{patch_key}/selections.json",
"type": "selections",
"patch": patch_key,
"count": len(patch.selected_names),
"bytes": len(data),
}
)
# TXT provides a simple one-name-per-line format for use in other
# tools or manual review.
names = [n["name"] if isinstance(n, dict) else n for n in patch.selected_names]
txt_data = "\n".join(names).encode("utf-8")
zf.writestr(f"patch_{patch_key}/selections.txt", txt_data)
files_included.append(
{
"path": f"patch_{patch_key}/selections.txt",
"type": "selections_txt",
"patch": patch_key,
"count": len(names),
"bytes": len(txt_data),
}
)
def _build_manifest(
*,
name: str,
version: str,
state: ServerState,
files_included: list[dict[str, Any]],
include_flags: dict[str, bool],
) -> dict[str, Any]:
"""Build manifest.json contents for the package."""
return {
"schema_version": 1,
"created_at": datetime.now(timezone.utc).isoformat(),
"package_name": name,
"version": version,
"patch_a": _patch_summary(state.patch_a),
"patch_b": _patch_summary(state.patch_b),
"include": include_flags,
"file_count": len(files_included),
"files": files_included,
}
def _patch_summary(patch: PatchState) -> dict[str, Any]:
"""Build a summary of a patch's state for the manifest."""
return {
"run_id": patch.run_id,
"corpus_type": patch.corpus_type,
"syllable_count": patch.syllable_count,
"walk_count": len(patch.walks),
"candidate_count": len(patch.candidates) if patch.candidates else 0,
"selection_count": len(patch.selected_names),
}
def _persist_to_disk(
*,
state: ServerState,
name: str,
version: str,
zip_bytes: bytes,
manifest: dict[str, Any],
include_flags: dict[str, bool],
files_included: list[dict[str, Any]],
) -> None:
"""Write the ZIP and a companion ``_metadata.json`` to disk.
Files are written under ``<output_base>/packages/``. Errors are
logged to stderr but do **not** prevent the in-memory ZIP from
being returned to the browser — disk persistence is best-effort.
"""
import sys
packages_dir = state.output_base / "packages"
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
stem = f"{name}-{version}_{timestamp}"
try:
packages_dir.mkdir(parents=True, exist_ok=True)
# Write ZIP
zip_path = packages_dir / f"{stem}.zip"
zip_path.write_bytes(zip_bytes)
# Write companion metadata JSON
metadata = {
"schema_version": 1,
"created_at": manifest.get("created_at", ""),
"package_name": name,
"version": version,
"patch_a": manifest.get("patch_a", {}),
"patch_b": manifest.get("patch_b", {}),
"include": include_flags,
"file_count": len(files_included),
"files_included": [f["path"] for f in files_included],
"zip_file": zip_path.name,
"zip_bytes": len(zip_bytes),
}
meta_path = packages_dir / f"{stem}_metadata.json"
meta_path.write_text(json.dumps(metadata, indent=2), encoding="utf-8")
except OSError as exc:
print(f"[packager] warning: failed to persist package to disk: {exc}", file=sys.stderr)