add performer schema validator and template
This commit is contained in:
parent
f830dbf408
commit
c6b5ca4cb5
160
src/performers/schema.py
Normal file
160
src/performers/schema.py
Normal file
|
|
@ -0,0 +1,160 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
schema.py
|
||||
---------
|
||||
Performer metadata schema and validator for Goondex.
|
||||
|
||||
Ensures that all performer.json files follow the canonical Goondex structure
|
||||
and provides tools to create, validate, and repair metadata records.
|
||||
|
||||
Used by:
|
||||
- verifier.py
|
||||
- trainer.py
|
||||
- tpdb_bridge.py
|
||||
- utils.py
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Canonical schema definition
|
||||
# ============================================================
|
||||
|
||||
PERFORMER_TEMPLATE: Dict[str, Any] = {
|
||||
"name": "",
|
||||
"normalized_id": "",
|
||||
"aliases": [],
|
||||
"gender": "",
|
||||
"birth_date": "",
|
||||
"age": None,
|
||||
"country": "",
|
||||
"ethnicity": "",
|
||||
"hair_color": "",
|
||||
"eye_color": "",
|
||||
"height_cm": None,
|
||||
"measurements": "",
|
||||
"bust_type": "",
|
||||
"tattoos": [],
|
||||
"piercings": [],
|
||||
"career": {
|
||||
"start_year": None,
|
||||
"end_year": None
|
||||
},
|
||||
"external_ids": {
|
||||
"tpdb": None,
|
||||
"stashdb": None,
|
||||
"iafd": None
|
||||
},
|
||||
"studios": {},
|
||||
"known_galleries": 0,
|
||||
"faces": [],
|
||||
"images": [],
|
||||
"notes": "",
|
||||
"biography": "",
|
||||
"last_updated": ""
|
||||
}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Utility functions
|
||||
# ============================================================
|
||||
|
||||
def _now_iso() -> str:
|
||||
"""Return current UTC time in ISO format."""
|
||||
return datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
|
||||
|
||||
|
||||
def normalize_id(name: str) -> str:
|
||||
"""Convert performer name to a filesystem-safe ID."""
|
||||
return name.lower().replace(" ", "_").replace("/", "_")
|
||||
|
||||
|
||||
def validate_performer_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate a performer.json dictionary against the canonical schema.
|
||||
Any missing keys are added with defaults. Unknown keys are preserved.
|
||||
"""
|
||||
validated = dict(PERFORMER_TEMPLATE)
|
||||
|
||||
for key, default in PERFORMER_TEMPLATE.items():
|
||||
if key not in data:
|
||||
validated[key] = default
|
||||
else:
|
||||
validated[key] = data[key]
|
||||
|
||||
# Always refresh timestamp
|
||||
validated["last_updated"] = _now_iso()
|
||||
return validated
|
||||
|
||||
|
||||
def create_blank_performer(name: str) -> Dict[str, Any]:
|
||||
"""Return a fresh performer dictionary using the template."""
|
||||
data = dict(PERFORMER_TEMPLATE)
|
||||
data["name"] = name
|
||||
data["normalized_id"] = normalize_id(name)
|
||||
data["last_updated"] = _now_iso()
|
||||
return data
|
||||
|
||||
|
||||
def load_performer_json(path: Path) -> Dict[str, Any]:
|
||||
"""Load performer.json, validate, and return clean data."""
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"Missing performer file: {path}")
|
||||
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
return validate_performer_data(data)
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON format in {path}: {e}")
|
||||
|
||||
|
||||
def save_performer_json(path: Path, data: Dict[str, Any]):
|
||||
"""Write performer.json back to disk with pretty formatting."""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data["last_updated"] = _now_iso()
|
||||
path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
|
||||
|
||||
def ensure_performer_schema(performer_name: str, base_dir: Path) -> Path:
|
||||
"""
|
||||
Ensure performer.json exists and matches schema.
|
||||
Returns path to the verified performer.json file.
|
||||
"""
|
||||
normalized = normalize_id(performer_name)
|
||||
performer_dir = base_dir / normalized
|
||||
performer_file = performer_dir / "performer.json"
|
||||
|
||||
if performer_file.exists():
|
||||
try:
|
||||
data = load_performer_json(performer_file)
|
||||
except (ValueError, FileNotFoundError):
|
||||
data = create_blank_performer(performer_name)
|
||||
else:
|
||||
data = create_blank_performer(performer_name)
|
||||
|
||||
save_performer_json(performer_file, data)
|
||||
return performer_file
|
||||
|
||||
|
||||
# ============================================================
|
||||
# CLI (for manual testing)
|
||||
# ============================================================
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Validate or create performer schema")
|
||||
parser.add_argument("name", help="Performer name (e.g. 'Jane Doe')")
|
||||
parser.add_argument(
|
||||
"--dir", default="data/faces",
|
||||
help="Base directory where performer folders live"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
base_dir = Path(args.dir)
|
||||
path = ensure_performer_schema(args.name, base_dir)
|
||||
print(f"[OK] Performer schema verified at {path}")
|
||||
Loading…
Reference in New Issue
Block a user