v0.3.5-r4: restore search-performer, fix tpdb_bridge import, note StashDB API limitation

This commit is contained in:
Team Goon 2025-11-08 15:44:09 -05:00
parent a750d1b436
commit 5edafa8ae3
12 changed files with 392 additions and 136 deletions

View File

@ -149,11 +149,11 @@ def main():
print(colorize("[INFO] Launching Goondex Enrichment Bridge...", Colors.CYAN))
env = os.environ.copy()
env["PYTHONPATH"] = "src" # ensures the 'performers' package is found
env["PYTHONPATH"] = "src" # ensures Goondex package resolution works
try:
subprocess.run(
["python", "-m", "performers.enrichment_bridge", *( [limit] if limit else [] )],
["python", "-m", "performers.bridge.enrichment_bridge", *( [limit] if limit else [] )],
check=True,
env=env,
)

Binary file not shown.

View File

@ -0,0 +1 @@
eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1aWQiOiIwNTc0NDgyYi1jZmQ3LTRmNmEtYTViYy0wMmVkMWRjMTNjY2UiLCJzdWIiOiJBUElLZXkiLCJpYXQiOjE3NjI1NDU0Mzh9.RQZ2YLae0gX_qgZ7nCeh40N4FjGug4dgz77DG0lyFtU

View File

@ -9,8 +9,8 @@ import sys
from pathlib import Path
from difflib import SequenceMatcher
from performers import scraper
from performers.utils import load_json, save_json, normalize_name
from src.performers import scraper
from src.performers.utils import load_json, save_json, normalize_name
# ─────────────────────────────────────────────

View File

@ -0,0 +1,160 @@
#!/usr/bin/env python3
"""
enrichment_bridge.py Goondex v0.3.5-r3
Performer enrichment orchestrator combining multiple data bridges:
TPDB StashDB PornPics Local DB/JSON.
This module serves as a unification layer for all external data
sources, merging their metadata into the Goondex performer schema
and writing updated JSON + SQLite entries.
It can be triggered directly from the CLI using:
gx enrich-bridge [limit]
"""
import json
import traceback
from pathlib import Path
from typing import Optional, Dict
from utils.cli_colours import pink, lilac, cyan, yellow, green, red, heading
from src.performers.utils import normalize_name
from src.performers.db_manager import add_or_update_performer
from src.performers.bridge.tpdb_bridge import fetch_tpdb_performers
from src.performers.bridge.stashdb_bridge import fetch_stashdb_performer
from src.importer.pornpics_bridge import fetch_pornpics_profile
# ─────────────────────────────────────────────
# Paths
# ─────────────────────────────────────────────
BASE_DIR = Path(__file__).resolve().parents[2]
DATA_DIR = BASE_DIR / "data" / "performers"
DATA_DIR.mkdir(parents=True, exist_ok=True)
# ─────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────
def _merge_performer_data(base: Dict, overlay: Optional[Dict]) -> Dict:
"""
Merge overlay dict into base performer data where base values
are missing or null. Deep-merges nested 'stats' and 'sources'.
"""
if not overlay:
return base
for k, v in overlay.items():
if k == "stats":
base.setdefault("stats", {}).update(v)
elif k == "sources":
base.setdefault("sources", {}).update(v)
elif not base.get(k) or base[k] in ("", "-", None, [], {}):
base[k] = v
return base
def _save_json_cache(performer: Dict):
"""Write unified performer metadata to /data/performers."""
pid = performer.get("id") or normalize_name(performer.get("name", "unknown"))
path = DATA_DIR / f"{pid}.json"
try:
path.write_text(json.dumps(performer, indent=2, ensure_ascii=False), encoding="utf-8")
print(green(f"[💾] Cached performer → {path.name}"))
except Exception as e:
print(red(f"[ERROR] Failed to save cache for {pid}: {e}"))
def _load_json_cache(name: str) -> Optional[Dict]:
"""Load an existing performer cache if present."""
normalized = normalize_name(name)
for candidate in [
DATA_DIR / f"{normalized}.json",
DATA_DIR / f"{normalized.replace('_', '-')}.json",
DATA_DIR / f"{normalized.replace('-', '_')}.json",
]:
if candidate.exists():
try:
return json.loads(candidate.read_text(encoding="utf-8"))
except Exception:
continue
return None
# ─────────────────────────────────────────────
# Enrichment Logic
# ─────────────────────────────────────────────
def enrich_performer(name: str) -> Optional[Dict]:
"""
Run a full enrichment sequence for a single performer.
TPDB StashDB PornPics DB/JSON
"""
print(heading(f"Enriching Performer: {name}", icon="💖"))
try:
# Load any existing cache first
performer = _load_json_cache(name) or {}
# ─ TPDB Fetch
print(cyan(f"[TPDB] Searching for '{name}'..."))
tpdb_results = fetch_tpdb_performers(limit=200)
tpdb_match = next((p for p in tpdb_results if name.lower() in p.get("name", "").lower()), None)
if tpdb_match:
print(green(f"[TPDB] Found → {tpdb_match['name']}"))
performer = _merge_performer_data(performer, tpdb_match)
else:
print(yellow(f"[TPDB] No direct match for '{name}'"))
# ─ StashDB Fetch
print(cyan(f"[STASHDB] Searching for '{name}'..."))
stashdb_data = fetch_stashdb_performer(name)
if stashdb_data:
print(green(f"[STASHDB] Found → {stashdb_data['name']}"))
performer = _merge_performer_data(performer, stashdb_data)
else:
print(yellow(f"[STASHDB] No results."))
# ─ PornPics Fetch
print(cyan(f"[PORNpics] Searching for '{name}'..."))
pp_data = fetch_pornpics_profile(name)
if pp_data:
print(green(f"[PORNpics] Found → {pp_data['name']}"))
performer = _merge_performer_data(performer, pp_data)
else:
print(yellow(f"[PORNpics] No profile found."))
# Save and update DB
if performer:
_save_json_cache(performer)
add_or_update_performer(performer)
print(green(f"[OK] Enrichment complete for {performer.get('name', name)}"))
else:
print(red(f"[ERROR] No data found for '{name}'"))
return performer
except Exception as e:
print(red(f"[CRITICAL] Failed to enrich {name}: {e}"))
print(traceback.format_exc())
return None
def enrich_all_performers(limit: Optional[int] = None):
"""
Run enrichment across all performers in /data/performers.
If limit is provided, only process that many entries.
"""
all_files = sorted(DATA_DIR.glob("*.json"))
if limit:
all_files = all_files[:limit]
print(heading(f"Launching Enrichment Bridge ({len(all_files)} performers)", icon="🧩"))
for idx, file in enumerate(all_files, start=1):
try:
data = json.loads(file.read_text(encoding="utf-8"))
name = data.get("name", file.stem)
print(lilac(f"\n[{idx}/{len(all_files)}] {name}"))
enrich_performer(name)
except Exception as e:
print(red(f"[ERROR] Failed to process {file.name}: {e}"))
continue
print(green("\n[OK] Bridge enrichment complete."))

View File

@ -0,0 +1,204 @@
#!/usr/bin/env python3
"""
stashdb_bridge.py Goondex v0.3.5-r4
Bridge between Goondex and StashDBs GraphQL API.
Retrieves performer metadata and normalises it to Goondex schema.
"""
import json
import requests
from typing import Dict, Any, Optional
from pathlib import Path
# ─────────────────────────────────────────────
# Internal Imports
# ─────────────────────────────────────────────
from utils.cli_colours import cyan, yellow, green, red, heading
# ─────────────────────────────────────────────
# Configuration
# ─────────────────────────────────────────────
STASHDB_URL = "https://stashdb.org/graphql"
# ─────────────────────────────────────────────
# API Key Handling
# ─────────────────────────────────────────────
API_KEY_PATH = Path("src/importer/secrets/stashdb_api_key.txt")
API_KEY = None
try:
if API_KEY_PATH.exists():
API_KEY = API_KEY_PATH.read_text(encoding="utf-8").strip()
except Exception as e:
print(f"[WARN] Could not read StashDB API key: {e}")
HEADERS = {
"Content-Type": "application/json",
"User-Agent": "Goondex/0.3.5-r4 (Leak Technologies)",
}
if API_KEY:
HEADERS["Authorization"] = f"Bearer {API_KEY}"
else:
print("[WARN] No StashDB API key found — limited public query mode.")
# ─────────────────────────────────────────────
# GraphQL Query (uses required `input:` object)
# ─────────────────────────────────────────────
GRAPHQL_QUERY = """
query FindPerformer($name: String!) {
queryPerformers(input: { name: $name, per_page: 1, page: 1 }) {
performers {
id
name
disambiguation
aliases
gender
birth_date
death_date
age
ethnicity
country
eye_color
hair_color
height
cup_size
band_size
waist_size
hip_size
breast_type
career_start_year
career_end_year
tattoos { location description }
piercings { location description }
is_favorite
images { url width height }
studios { studio { name } scene_count }
urls { url }
}
}
}
"""
# ─────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────
def _safe_request(query: str, variables: Dict[str, Any]) -> Optional[dict]:
"""Perform GraphQL request with graceful error handling."""
try:
resp = requests.post(
STASHDB_URL,
headers=HEADERS,
json={"query": query, "variables": variables},
timeout=20,
)
if resp.status_code == 200:
data = resp.json()
return data.get("data", {})
print(red(f"[STASHDB] HTTP {resp.status_code}: {resp.text[:300]}"))
except Exception as e:
print(red(f"[STASHDB] Request failed: {e}"))
return None
def _normalize_performer(raw: dict) -> Dict[str, Any]:
"""Map StashDB performer fields into Goondex schema."""
if not raw:
return {}
# URLs
urls = [u.get("url") for u in (raw.get("urls") or []) if u.get("url")]
# Studios (flatten)
studios: list[str] = []
for s in (raw.get("studios") or []):
studio = s.get("studio") or {}
name = studio.get("name")
count = s.get("scene_count")
if name:
studios.append(name if count is None else f"{name} ({count} scenes)")
# Highest-res image
primary_image = None
if raw.get("images"):
images = sorted(
raw["images"],
key=lambda i: (i.get("width", 0) * i.get("height", 0)),
reverse=True,
)
if images:
primary_image = images[0].get("url")
# Measurements string from separate fields
measurements = None
parts = [
str(raw.get("band_size") or ""),
str(raw.get("cup_size") or ""),
str(raw.get("waist_size") or ""),
str(raw.get("hip_size") or ""),
]
parts = [p for p in parts if p and p != "None"]
if parts:
measurements = "-".join(parts)
mapped = {
"id": f"STASH-{raw.get('id')}",
"name": raw.get("name"),
"aliases": raw.get("aliases", []),
"gender": raw.get("gender"),
"birth_date": raw.get("birth_date"),
"death_date": raw.get("death_date"),
"age": raw.get("age"),
"country": raw.get("country"),
"ethnicity": raw.get("ethnicity"),
"hair_color": raw.get("hair_color"),
"eye_color": raw.get("eye_color"),
"height_cm": raw.get("height"),
"measurements": measurements,
"breast_type": raw.get("breast_type"),
"tattoos": raw.get("tattoos"),
"piercings": raw.get("piercings"),
"career_start": raw.get("career_start_year"),
"career_end": raw.get("career_end_year"),
"favourite": raw.get("is_favorite"),
"thumbnail": primary_image,
"studios": studios,
"urls": urls,
"source": "StashDB",
}
return {k: v for k, v in mapped.items() if v not in (None, "", [], {})}
# ─────────────────────────────────────────────
# Public API
# ─────────────────────────────────────────────
def fetch_stashdb_performer(name: str) -> Optional[Dict[str, Any]]:
"""Fetch performer by name from StashDB and normalize."""
print(heading("StashDB Bridge"))
print(cyan(f"[INFO] Querying StashDB for '{name}'"))
data = _safe_request(GRAPHQL_QUERY, {"name": name})
if not data:
print(yellow(f"[WARN] No response for performer '{name}'"))
return None
performers = data.get("queryPerformers", {}).get("performers", [])
if not performers:
print(yellow(f"[WARN] No performer found for '{name}'"))
return None
performer = performers[0]
norm = _normalize_performer(performer)
print(green(f"[OK] Retrieved performer → {norm.get('name')}"))
return norm
# ─────────────────────────────────────────────
# Standalone Run
# ─────────────────────────────────────────────
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python -m performers.bridge.stashdb_bridge '<performer name>'")
sys.exit(1)
name = " ".join(sys.argv[1:])
result = fetch_stashdb_performer(name)
print(json.dumps(result, indent=2, ensure_ascii=False))

View File

@ -1,91 +0,0 @@
#!/usr/bin/env python3
"""
cli_colours.py Goondex Terminal Colour Helper
Centralised ANSI colour definitions for Goondex CLI output.
Keeps all modules visually consistent (importer, TPDB bridge,
performer search, ML tools, etc.)
Palette Flamingo Pulse theme:
pink primary accent
lilac secondary accent
cyan highlight / link
yellow warning / info
white base text
grey muted / subtle
red error / critical (added for compatibility)
green success / confirmation (added for compatibility)
reset reset sequence
"""
# ANSI escape sequences
_RESET = "\033[0m"
# Brand palette (Flamingo Pulse inspired)
PINK = "\033[38;5;205m" # vivid magenta-pink
LILAC = "\033[38;5;177m" # soft violet accent
CYAN = "\033[38;5;123m" # turquoise-cyan for links
YELLOW = "\033[38;5;228m" # bright pastel yellow
WHITE = "\033[38;5;255m" # near-white text
GREY = "\033[38;5;246m" # neutral soft grey
RED = "\033[38;5;196m" # bold red for critical errors
GREEN = "\033[38;5;82m" # vivid green for confirmations
BOLD = "\033[1m"
DIM = "\033[2m"
# ─────────────────────────────────────────────
# Helper functions for inline use
# ─────────────────────────────────────────────
def pink(text: str) -> str:
return f"{PINK}{text}{_RESET}"
def lilac(text: str) -> str:
return f"{LILAC}{text}{_RESET}"
def cyan(text: str) -> str:
return f"{CYAN}{text}{_RESET}"
def yellow(text: str) -> str:
return f"{YELLOW}{text}{_RESET}"
def white(text: str) -> str:
return f"{WHITE}{text}{_RESET}"
def grey(text: str) -> str:
return f"{GREY}{text}{_RESET}"
def red(text: str) -> str:
return f"{RED}{text}{_RESET}"
def green(text: str) -> str:
return f"{GREEN}{text}{_RESET}"
def bold(text: str) -> str:
return f"{BOLD}{text}{_RESET}"
def dim(text: str) -> str:
return f"{DIM}{text}{_RESET}"
# ─────────────────────────────────────────────
# Composite helpers
# ─────────────────────────────────────────────
def heading(title: str, icon: str = "💖", version: str | None = None) -> str:
"""Generate a styled Goondex section header."""
bar = grey("" * 45)
ver = f" · {grey(version)}" if version else ""
return f"\n{bar}\n{pink(icon)} {bold(white(title))}{ver}\n{bar}"
def success(msg: str) -> str:
return f"{GREEN}{msg}{_RESET}"
def warning(msg: str) -> str:
return f"{YELLOW}⚠️ {msg}{_RESET}"
def error(msg: str) -> str:
return f"{RED}{msg}{_RESET}"
def info(msg: str) -> str:
return f"{CYAN} {msg}{_RESET}"
def muted(msg: str) -> str:
return f"{GREY}{msg}{_RESET}"

View File

@ -1,6 +1,6 @@
import requests
from bs4 import BeautifulSoup
from performers.utils import normalize_name
from src.performers.utils import normalize_name
def extract_aliases(url: str) -> list[str]:
"""

View File

@ -24,7 +24,7 @@ from src.ml.facecrop.image_display import show_image
from src.performers.utils import normalize_name
from src.performers.db_manager import add_or_update_performer
from src.importer.pornpics_bridge import fetch_pornpics_profile
from src.performers.tpdb_bridge import fetch_tpdb_performers
from src.performers.bridge.tpdb_bridge import fetch_tpdb_performers
# ============================================================
# Paths

View File

@ -3,7 +3,7 @@
cli_colours.py Goondex Terminal Colour Helper
Centralised ANSI colour definitions for Goondex CLI output.
Keeps all modules visually consistent (importer, TPDB bridge,
Keeps all modules visually consistent (importer, TPDB/StashDB bridges,
performer search, ML tools, etc.)
Palette Flamingo Pulse theme:
@ -13,6 +13,8 @@ Palette — Flamingo Pulse theme:
yellow warning / info
white base text
grey muted / subtle
red error / critical
green success / confirmation
reset reset sequence
"""
@ -26,35 +28,24 @@ CYAN = "\033[38;5;123m" # turquoise-cyan for links
YELLOW = "\033[38;5;228m" # bright pastel yellow
WHITE = "\033[38;5;255m" # near-white text
GREY = "\033[38;5;246m" # neutral soft grey
RED = "\033[38;5;196m" # vivid red for critical errors
GREEN = "\033[38;5;82m" # bright green for success
BOLD = "\033[1m"
DIM = "\033[2m"
# ─────────────────────────────────────────────
# Helper functions for inline use
# Inline helper functions
# ─────────────────────────────────────────────
def pink(text: str) -> str:
return f"{PINK}{text}{_RESET}"
def lilac(text: str) -> str:
return f"{LILAC}{text}{_RESET}"
def cyan(text: str) -> str:
return f"{CYAN}{text}{_RESET}"
def yellow(text: str) -> str:
return f"{YELLOW}{text}{_RESET}"
def white(text: str) -> str:
return f"{WHITE}{text}{_RESET}"
def grey(text: str) -> str:
return f"{GREY}{text}{_RESET}"
def bold(text: str) -> str:
return f"{BOLD}{text}{_RESET}"
def dim(text: str) -> str:
return f"{DIM}{text}{_RESET}"
def pink(text: str) -> str: return f"{PINK}{text}{_RESET}"
def lilac(text: str) -> str: return f"{LILAC}{text}{_RESET}"
def cyan(text: str) -> str: return f"{CYAN}{text}{_RESET}"
def yellow(text: str) -> str: return f"{YELLOW}{text}{_RESET}"
def white(text: str) -> str: return f"{WHITE}{text}{_RESET}"
def grey(text: str) -> str: return f"{GREY}{text}{_RESET}"
def red(text: str) -> str: return f"{RED}{text}{_RESET}"
def green(text: str) -> str: return f"{GREEN}{text}{_RESET}"
def bold(text: str) -> str: return f"{BOLD}{text}{_RESET}"
def dim(text: str) -> str: return f"{DIM}{text}{_RESET}"
# ─────────────────────────────────────────────
# Composite helpers
@ -65,17 +56,8 @@ def heading(title: str, icon: str = "💖", version: str | None = None) -> str:
ver = f" · {grey(version)}" if version else ""
return f"\n{bar}\n{pink(icon)} {bold(white(title))}{ver}\n{bar}"
def success(msg: str) -> str:
return f"{WHITE}{msg}{_RESET}"
def warning(msg: str) -> str:
return f"{YELLOW}⚠️ {msg}{_RESET}"
def error(msg: str) -> str:
return f"{PINK}{msg}{_RESET}"
def info(msg: str) -> str:
return f"{CYAN} {msg}{_RESET}"
def muted(msg: str) -> str:
return f"{GREY}{msg}{_RESET}"
def success(msg: str) -> str: return f"{GREEN}{msg}{_RESET}"
def warning(msg: str) -> str: return f"{YELLOW}⚠️ {msg}{_RESET}"
def error(msg: str) -> str: return f"{RED}{msg}{_RESET}"
def info(msg: str) -> str: return f"{CYAN} {msg}{_RESET}"
def muted(msg: str) -> str: return f"{GREY}{msg}{_RESET}"