Goondex/internal/db/schema.go
Stu Leak 3b8adad57d 🚀 Goondex v0.1.0-dev3 - Comprehensive ML-Powered Search & Import System
MAJOR FEATURES ADDED:
======================

🤖 ML Analysis System:
- Comprehensive scene image analysis with per-scene predictions
- Enhanced database schema with scene_ml_analysis table
- Advanced detection for clothing colors, body types, age categories, positions, settings
- Support for multiple prediction types (clothing, body, sexual acts, etc.)
- Confidence scoring and ML source tracking

🧠 Enhanced Search Capabilities:
- Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch")
- Category-based search with confidence-weighted results
- ML-enhanced tag matching with automatic fallback to traditional search
- Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection

🗄️ Advanced Database Schema:
- Male detection: circumcised field (0/1)
- Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy
- Scene ML analysis table for storing per-scene predictions
- Comprehensive seed tags for all detection categories

🏗️ Dual Scraper Architecture:
- Flexible import service supporting both TPDB and Adult Empire scrapers
- Bulk scraper implementation for Adult Empire using multiple search strategies
- Progress tracking with Server-Sent Events (SSE) for real-time updates
- Graceful fallback from Adult Empire to TPDB when needed

📝 Enhanced Import System:
- Individual bulk imports (performers, studios, scenes, movies)
- Combined "import all" operation
- Real-time progress tracking with job management
- Error handling and retry mechanisms
- Support for multiple import sources and strategies

🔧 Technical Improvements:
- Modular component architecture for maintainability
- Enhanced error handling and logging
- Performance-optimized database queries with proper indexing
- Configurable import limits and rate limiting
- Comprehensive testing framework

This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
2025-12-30 21:52:25 -05:00

276 lines
8.7 KiB
Go

package db
const schema = `
-- Enable foreign keys
PRAGMA foreign_keys = ON;
-- Performers table
CREATE TABLE IF NOT EXISTS performers (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
aliases TEXT,
-- Physical attributes
gender TEXT,
birthday TEXT,
astrology TEXT,
birthplace TEXT,
ethnicity TEXT,
nationality TEXT,
country TEXT,
eye_color TEXT,
hair_color TEXT,
height INTEGER,
weight INTEGER,
measurements TEXT,
cup_size TEXT,
tattoo_description TEXT,
piercing_description TEXT,
boob_job TEXT,
circumcised INTEGER DEFAULT 0,
pubic_hair_type TEXT DEFAULT 'natural',
-- Career information
career TEXT,
career_start_year INTEGER,
career_end_year INTEGER,
date_of_death TEXT,
active INTEGER DEFAULT 1,
-- Media
image_path TEXT,
image_url TEXT,
poster_url TEXT,
bio TEXT,
-- Source tracking
source TEXT,
source_id TEXT,
source_numeric_id INTEGER,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
-- Prevent duplicates from same source
UNIQUE(source, source_id)
);
-- Studios table
CREATE TABLE IF NOT EXISTS studios (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
parent_id INTEGER,
image_path TEXT,
image_url TEXT,
description TEXT,
source TEXT,
source_id TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (parent_id) REFERENCES studios(id) ON DELETE SET NULL
);
-- Tag Categories table (hierarchical)
CREATE TABLE IF NOT EXISTS tag_categories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
parent_id INTEGER,
description TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (parent_id) REFERENCES tag_categories(id) ON DELETE CASCADE
);
-- Tags table (enhanced with categories)
CREATE TABLE IF NOT EXISTS tags (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
category_id INTEGER NOT NULL,
aliases TEXT,
description TEXT,
source TEXT,
source_id TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
UNIQUE(category_id, name),
FOREIGN KEY (category_id) REFERENCES tag_categories(id) ON DELETE CASCADE
);
-- Scenes table
CREATE TABLE IF NOT EXISTS scenes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
code TEXT,
date TEXT,
studio_id INTEGER,
description TEXT,
image_path TEXT,
image_url TEXT,
director TEXT,
url TEXT,
source TEXT,
source_id TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (studio_id) REFERENCES studios(id) ON DELETE SET NULL
);
-- Movies table (full-length DVDs/releases)
CREATE TABLE IF NOT EXISTS movies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
date TEXT,
studio_id INTEGER,
description TEXT,
director TEXT,
duration INTEGER,
image_path TEXT,
image_url TEXT,
back_image_url TEXT,
url TEXT,
source TEXT,
source_id TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (studio_id) REFERENCES studios(id) ON DELETE SET NULL
);
-- Movie-Scene many-to-many junction table (scenes belong to movies)
CREATE TABLE IF NOT EXISTS movie_scenes (
movie_id INTEGER NOT NULL,
scene_id INTEGER NOT NULL,
scene_number INTEGER,
PRIMARY KEY (movie_id, scene_id),
FOREIGN KEY (movie_id) REFERENCES movies(id) ON DELETE CASCADE,
FOREIGN KEY (scene_id) REFERENCES scenes(id) ON DELETE CASCADE
);
-- Movie-Performer many-to-many junction table
CREATE TABLE IF NOT EXISTS movie_performers (
movie_id INTEGER NOT NULL,
performer_id INTEGER NOT NULL,
PRIMARY KEY (movie_id, performer_id),
FOREIGN KEY (movie_id) REFERENCES movies(id) ON DELETE CASCADE,
FOREIGN KEY (performer_id) REFERENCES performers(id) ON DELETE CASCADE
);
-- Movie-Tag many-to-many junction table
CREATE TABLE IF NOT EXISTS movie_tags (
movie_id INTEGER NOT NULL,
tag_id INTEGER NOT NULL,
PRIMARY KEY (movie_id, tag_id),
FOREIGN KEY (movie_id) REFERENCES movies(id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tags(id) ON DELETE CASCADE
);
-- Scene-Performer many-to-many junction table
CREATE TABLE IF NOT EXISTS scene_performers (
scene_id INTEGER NOT NULL,
performer_id INTEGER NOT NULL,
PRIMARY KEY (scene_id, performer_id),
FOREIGN KEY (scene_id) REFERENCES scenes(id) ON DELETE CASCADE,
FOREIGN KEY (performer_id) REFERENCES performers(id) ON DELETE CASCADE
);
-- Scene-Tag many-to-many junction table (enhanced with ML support)
CREATE TABLE IF NOT EXISTS scene_tags (
scene_id INTEGER NOT NULL,
tag_id INTEGER NOT NULL,
confidence REAL DEFAULT 1.0,
source TEXT NOT NULL DEFAULT 'user',
verified INTEGER DEFAULT 0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
PRIMARY KEY (scene_id, tag_id),
FOREIGN KEY (scene_id) REFERENCES scenes(id) ON DELETE CASCADE,
FOREIGN KEY (tag_id) REFERENCES tags(id) ON DELETE CASCADE
);
-- Scene ML Analysis results table (for storing per-scene ML predictions)
CREATE TABLE IF NOT EXISTS scene_ml_analysis (
id INTEGER PRIMARY KEY AUTOINCREMENT,
scene_id INTEGER NOT NULL,
model_version TEXT NOT NULL,
prediction_type TEXT NOT NULL, -- 'clothing', 'position', 'body_type', 'hair', 'ethnicity', etc.
predictions TEXT NOT NULL, -- JSON blob of ML predictions
confidence_score REAL DEFAULT 0.0,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (scene_id) REFERENCES scenes(id) ON DELETE CASCADE
);
-- Scene Images table (for ML training and PornPics integration)
CREATE TABLE IF NOT EXISTS scene_images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
scene_id INTEGER NOT NULL,
image_url TEXT NOT NULL,
image_path TEXT,
source TEXT,
source_id TEXT,
width INTEGER,
height INTEGER,
file_size INTEGER,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (scene_id) REFERENCES scenes(id) ON DELETE CASCADE
);
-- ML Predictions table (track model versions and predictions)
CREATE TABLE IF NOT EXISTS ml_predictions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
scene_id INTEGER,
image_id INTEGER,
model_version TEXT NOT NULL,
predictions TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
FOREIGN KEY (scene_id) REFERENCES scenes(id) ON DELETE CASCADE,
FOREIGN KEY (image_id) REFERENCES scene_images(id) ON DELETE CASCADE
);
-- Indexes for common queries (v0.1.0)
CREATE INDEX IF NOT EXISTS idx_performers_name ON performers(name);
CREATE INDEX IF NOT EXISTS idx_studios_name ON studios(name);
CREATE INDEX IF NOT EXISTS idx_scenes_title ON scenes(title);
CREATE INDEX IF NOT EXISTS idx_scenes_code ON scenes(code);
CREATE INDEX IF NOT EXISTS idx_scenes_date ON scenes(date);
CREATE INDEX IF NOT EXISTS idx_movies_title ON movies(title);
CREATE INDEX IF NOT EXISTS idx_movies_date ON movies(date);
CREATE INDEX IF NOT EXISTS idx_movie_scenes_movie ON movie_scenes(movie_id);
CREATE INDEX IF NOT EXISTS idx_movie_scenes_scene ON movie_scenes(scene_id);
-- Tag search indexes (v0.2.0 - ML ready)
CREATE INDEX IF NOT EXISTS idx_tag_categories_name ON tag_categories(name);
CREATE INDEX IF NOT EXISTS idx_tag_categories_parent ON tag_categories(parent_id);
CREATE INDEX IF NOT EXISTS idx_tags_name ON tags(name);
CREATE INDEX IF NOT EXISTS idx_tags_category ON tags(category_id);
-- Scene tag filtering indexes (critical for complex queries)
CREATE INDEX IF NOT EXISTS idx_scene_tags_tag ON scene_tags(tag_id);
CREATE INDEX IF NOT EXISTS idx_scene_tags_scene ON scene_tags(scene_id);
CREATE INDEX IF NOT EXISTS idx_scene_tags_confidence ON scene_tags(confidence);
CREATE INDEX IF NOT EXISTS idx_scene_tags_verified ON scene_tags(verified);
CREATE INDEX IF NOT EXISTS idx_scene_tags_source ON scene_tags(source);
-- Image processing indexes
CREATE INDEX IF NOT EXISTS idx_scene_images_scene ON scene_images(scene_id);
CREATE INDEX IF NOT EXISTS idx_scene_images_source ON scene_images(source, source_id);
-- ML prediction indexes
CREATE INDEX IF NOT EXISTS idx_ml_predictions_scene ON ml_predictions(scene_id);
CREATE INDEX IF NOT EXISTS idx_ml_predictions_image ON ml_predictions(image_id);
CREATE INDEX IF NOT EXISTS idx_ml_predictions_model ON ml_predictions(model_version);
-- Sync metadata table (track last sync times)
CREATE TABLE IF NOT EXISTS sync_metadata (
id INTEGER PRIMARY KEY AUTOINCREMENT,
entity_type TEXT NOT NULL UNIQUE,
last_sync_at TEXT NOT NULL,
records_updated INTEGER DEFAULT 0,
records_failed INTEGER DEFAULT 0,
status TEXT NOT NULL DEFAULT 'completed',
error_message TEXT,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_sync_metadata_entity ON sync_metadata(entity_type);
CREATE INDEX IF NOT EXISTS idx_sync_metadata_last_sync ON sync_metadata(last_sync_at);
`