From c6b5ca4cb5bca27d1c5c941a948e7ce442de9c0f Mon Sep 17 00:00:00 2001 From: Team Goon Date: Fri, 7 Nov 2025 07:51:11 -0500 Subject: [PATCH] add performer schema validator and template --- src/performers/schema.py | 160 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 src/performers/schema.py diff --git a/src/performers/schema.py b/src/performers/schema.py new file mode 100644 index 0000000..b9d0e16 --- /dev/null +++ b/src/performers/schema.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +""" +schema.py +--------- +Performer metadata schema and validator for Goondex. + +Ensures that all performer.json files follow the canonical Goondex structure +and provides tools to create, validate, and repair metadata records. + +Used by: + - verifier.py + - trainer.py + - tpdb_bridge.py + - utils.py +""" + +import json +from pathlib import Path +from datetime import datetime +from typing import Any, Dict + + +# ============================================================ +# Canonical schema definition +# ============================================================ + +PERFORMER_TEMPLATE: Dict[str, Any] = { + "name": "", + "normalized_id": "", + "aliases": [], + "gender": "", + "birth_date": "", + "age": None, + "country": "", + "ethnicity": "", + "hair_color": "", + "eye_color": "", + "height_cm": None, + "measurements": "", + "bust_type": "", + "tattoos": [], + "piercings": [], + "career": { + "start_year": None, + "end_year": None + }, + "external_ids": { + "tpdb": None, + "stashdb": None, + "iafd": None + }, + "studios": {}, + "known_galleries": 0, + "faces": [], + "images": [], + "notes": "", + "biography": "", + "last_updated": "" +} + + +# ============================================================ +# Utility functions +# ============================================================ + +def _now_iso() -> str: + """Return current UTC time in ISO format.""" + return datetime.utcnow().replace(microsecond=0).isoformat() + "Z" + + +def normalize_id(name: str) -> str: + """Convert performer name to a filesystem-safe ID.""" + return name.lower().replace(" ", "_").replace("/", "_") + + +def validate_performer_data(data: Dict[str, Any]) -> Dict[str, Any]: + """ + Validate a performer.json dictionary against the canonical schema. + Any missing keys are added with defaults. Unknown keys are preserved. + """ + validated = dict(PERFORMER_TEMPLATE) + + for key, default in PERFORMER_TEMPLATE.items(): + if key not in data: + validated[key] = default + else: + validated[key] = data[key] + + # Always refresh timestamp + validated["last_updated"] = _now_iso() + return validated + + +def create_blank_performer(name: str) -> Dict[str, Any]: + """Return a fresh performer dictionary using the template.""" + data = dict(PERFORMER_TEMPLATE) + data["name"] = name + data["normalized_id"] = normalize_id(name) + data["last_updated"] = _now_iso() + return data + + +def load_performer_json(path: Path) -> Dict[str, Any]: + """Load performer.json, validate, and return clean data.""" + if not path.exists(): + raise FileNotFoundError(f"Missing performer file: {path}") + + try: + data = json.loads(path.read_text(encoding="utf-8")) + return validate_performer_data(data) + except json.JSONDecodeError as e: + raise ValueError(f"Invalid JSON format in {path}: {e}") + + +def save_performer_json(path: Path, data: Dict[str, Any]): + """Write performer.json back to disk with pretty formatting.""" + path.parent.mkdir(parents=True, exist_ok=True) + data["last_updated"] = _now_iso() + path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8") + + +def ensure_performer_schema(performer_name: str, base_dir: Path) -> Path: + """ + Ensure performer.json exists and matches schema. + Returns path to the verified performer.json file. + """ + normalized = normalize_id(performer_name) + performer_dir = base_dir / normalized + performer_file = performer_dir / "performer.json" + + if performer_file.exists(): + try: + data = load_performer_json(performer_file) + except (ValueError, FileNotFoundError): + data = create_blank_performer(performer_name) + else: + data = create_blank_performer(performer_name) + + save_performer_json(performer_file, data) + return performer_file + + +# ============================================================ +# CLI (for manual testing) +# ============================================================ + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Validate or create performer schema") + parser.add_argument("name", help="Performer name (e.g. 'Jane Doe')") + parser.add_argument( + "--dir", default="data/faces", + help="Base directory where performer folders live" + ) + args = parser.parse_args() + + base_dir = Path(args.dir) + path = ensure_performer_schema(args.name, base_dir) + print(f"[OK] Performer schema verified at {path}")