add performer schema validator and template

This commit is contained in:
Team Goon 2025-11-07 07:51:11 -05:00
parent f830dbf408
commit c6b5ca4cb5

160
src/performers/schema.py Normal file
View File

@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
schema.py
---------
Performer metadata schema and validator for Goondex.
Ensures that all performer.json files follow the canonical Goondex structure
and provides tools to create, validate, and repair metadata records.
Used by:
- verifier.py
- trainer.py
- tpdb_bridge.py
- utils.py
"""
import json
from pathlib import Path
from datetime import datetime
from typing import Any, Dict
# ============================================================
# Canonical schema definition
# ============================================================
PERFORMER_TEMPLATE: Dict[str, Any] = {
"name": "",
"normalized_id": "",
"aliases": [],
"gender": "",
"birth_date": "",
"age": None,
"country": "",
"ethnicity": "",
"hair_color": "",
"eye_color": "",
"height_cm": None,
"measurements": "",
"bust_type": "",
"tattoos": [],
"piercings": [],
"career": {
"start_year": None,
"end_year": None
},
"external_ids": {
"tpdb": None,
"stashdb": None,
"iafd": None
},
"studios": {},
"known_galleries": 0,
"faces": [],
"images": [],
"notes": "",
"biography": "",
"last_updated": ""
}
# ============================================================
# Utility functions
# ============================================================
def _now_iso() -> str:
"""Return current UTC time in ISO format."""
return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
def normalize_id(name: str) -> str:
"""Convert performer name to a filesystem-safe ID."""
return name.lower().replace(" ", "_").replace("/", "_")
def validate_performer_data(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Validate a performer.json dictionary against the canonical schema.
Any missing keys are added with defaults. Unknown keys are preserved.
"""
validated = dict(PERFORMER_TEMPLATE)
for key, default in PERFORMER_TEMPLATE.items():
if key not in data:
validated[key] = default
else:
validated[key] = data[key]
# Always refresh timestamp
validated["last_updated"] = _now_iso()
return validated
def create_blank_performer(name: str) -> Dict[str, Any]:
"""Return a fresh performer dictionary using the template."""
data = dict(PERFORMER_TEMPLATE)
data["name"] = name
data["normalized_id"] = normalize_id(name)
data["last_updated"] = _now_iso()
return data
def load_performer_json(path: Path) -> Dict[str, Any]:
"""Load performer.json, validate, and return clean data."""
if not path.exists():
raise FileNotFoundError(f"Missing performer file: {path}")
try:
data = json.loads(path.read_text(encoding="utf-8"))
return validate_performer_data(data)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON format in {path}: {e}")
def save_performer_json(path: Path, data: Dict[str, Any]):
"""Write performer.json back to disk with pretty formatting."""
path.parent.mkdir(parents=True, exist_ok=True)
data["last_updated"] = _now_iso()
path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
def ensure_performer_schema(performer_name: str, base_dir: Path) -> Path:
"""
Ensure performer.json exists and matches schema.
Returns path to the verified performer.json file.
"""
normalized = normalize_id(performer_name)
performer_dir = base_dir / normalized
performer_file = performer_dir / "performer.json"
if performer_file.exists():
try:
data = load_performer_json(performer_file)
except (ValueError, FileNotFoundError):
data = create_blank_performer(performer_name)
else:
data = create_blank_performer(performer_name)
save_performer_json(performer_file, data)
return performer_file
# ============================================================
# CLI (for manual testing)
# ============================================================
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Validate or create performer schema")
parser.add_argument("name", help="Performer name (e.g. 'Jane Doe')")
parser.add_argument(
"--dir", default="data/faces",
help="Base directory where performer folders live"
)
args = parser.parse_args()
base_dir = Path(args.dir)
path = ensure_performer_schema(args.name, base_dir)
print(f"[OK] Performer schema verified at {path}")