MAJOR FEATURES ADDED: ====================== 🤖 ML Analysis System: - Comprehensive scene image analysis with per-scene predictions - Enhanced database schema with scene_ml_analysis table - Advanced detection for clothing colors, body types, age categories, positions, settings - Support for multiple prediction types (clothing, body, sexual acts, etc.) - Confidence scoring and ML source tracking 🧠 Enhanced Search Capabilities: - Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch") - Category-based search with confidence-weighted results - ML-enhanced tag matching with automatic fallback to traditional search - Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection 🗄️ Advanced Database Schema: - Male detection: circumcised field (0/1) - Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy - Scene ML analysis table for storing per-scene predictions - Comprehensive seed tags for all detection categories 🏗️ Dual Scraper Architecture: - Flexible import service supporting both TPDB and Adult Empire scrapers - Bulk scraper implementation for Adult Empire using multiple search strategies - Progress tracking with Server-Sent Events (SSE) for real-time updates - Graceful fallback from Adult Empire to TPDB when needed 📝 Enhanced Import System: - Individual bulk imports (performers, studios, scenes, movies) - Combined "import all" operation - Real-time progress tracking with job management - Error handling and retry mechanisms - Support for multiple import sources and strategies 🔧 Technical Improvements: - Modular component architecture for maintainability - Enhanced error handling and logging - Performance-optimized database queries with proper indexing - Configurable import limits and rate limiting - Comprehensive testing framework This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
201 lines
5.4 KiB
Go
201 lines
5.4 KiB
Go
package search
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
// SearchQuery represents a parsed search query
|
|
type SearchQuery struct {
|
|
Original string
|
|
Performers []string
|
|
Actions []string
|
|
Clothing []string
|
|
Colors []string
|
|
BodyTypes []string
|
|
AgeCategories []string
|
|
Ethnicities []string
|
|
Settings []string
|
|
Positions []string
|
|
Production []string
|
|
Requirements []string // must-have terms
|
|
Preferences []string // nice-to-have terms
|
|
}
|
|
|
|
// Parser handles natural language search query parsing
|
|
type Parser struct {
|
|
// Keyword mappings for different categories
|
|
actions map[string]bool
|
|
clothing map[string]bool
|
|
colors map[string]bool
|
|
bodyTypes map[string]bool
|
|
ageCategories map[string]bool
|
|
ethnicities map[string]bool
|
|
settings map[string]bool
|
|
positions map[string]bool
|
|
production map[string]bool
|
|
}
|
|
|
|
// NewParser creates a new search query parser
|
|
func NewParser() *Parser {
|
|
p := &Parser{
|
|
actions: make(map[string]bool),
|
|
clothing: make(map[string]bool),
|
|
colors: make(map[string]bool),
|
|
bodyTypes: make(map[string]bool),
|
|
ageCategories: make(map[string]bool),
|
|
ethnicities: make(map[string]bool),
|
|
settings: make(map[string]bool),
|
|
positions: make(map[string]bool),
|
|
production: make(map[string]bool),
|
|
}
|
|
|
|
// Initialize keyword mappings
|
|
p.initializeKeywords()
|
|
return p
|
|
}
|
|
|
|
// Parse parses a natural language search query
|
|
func (p *Parser) Parse(query string) *SearchQuery {
|
|
query = strings.ToLower(query)
|
|
query = strings.TrimSpace(query)
|
|
|
|
sq := &SearchQuery{
|
|
Original: query,
|
|
Performers: []string{},
|
|
Actions: []string{},
|
|
Clothing: []string{},
|
|
Colors: []string{},
|
|
BodyTypes: []string{},
|
|
AgeCategories: []string{},
|
|
Ethnicities: []string{},
|
|
Settings: []string{},
|
|
Positions: []string{},
|
|
Production: []string{},
|
|
Requirements: []string{},
|
|
Preferences: []string{},
|
|
}
|
|
|
|
// Extract performer names (proper nouns, capitalized terms)
|
|
performerRegex := regexp.MustCompile(`\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b`)
|
|
matches := performerRegex.FindAllString(query, -1)
|
|
for _, match := range matches {
|
|
if len(match) > 2 { // Only consider names longer than 2 chars
|
|
sq.Performers = append(sq.Performers, match)
|
|
}
|
|
}
|
|
|
|
// Extract age-specific terms
|
|
if strings.Contains(query, "teen") || strings.Contains(query, "teenage") {
|
|
sq.AgeCategories = append(sq.AgeCategories, "teen")
|
|
}
|
|
if strings.Contains(query, "milf") {
|
|
sq.AgeCategories = append(sq.AgeCategories, "milf")
|
|
}
|
|
if strings.Contains(query, "mature") {
|
|
sq.AgeCategories = append(sq.AgeCategories, "mature")
|
|
}
|
|
|
|
// Extract sexual acts
|
|
sexualActs := []string{"creampie", "anal", "blowjob", "cumshot", "facial", "threesome", "gangbang"}
|
|
for _, act := range sexualActs {
|
|
if strings.Contains(query, act) {
|
|
sq.Actions = append(sq.Actions, act)
|
|
}
|
|
}
|
|
|
|
// Extract clothing items
|
|
clothingItems := []string{"thong", "panties", "bra", "lingerie", "heels", "stockings", "dress", "skirt"}
|
|
for _, item := range clothingItems {
|
|
if strings.Contains(query, item) {
|
|
sq.Clothing = append(sq.Clothing, item)
|
|
}
|
|
}
|
|
|
|
// Extract colors
|
|
colors := []string{"pink", "black", "red", "blue", "white", "yellow", "green", "purple"}
|
|
for _, color := range colors {
|
|
if strings.Contains(query, color) {
|
|
sq.Colors = append(sq.Colors, color)
|
|
}
|
|
}
|
|
|
|
// Extract body types
|
|
bodyTypes := []string{"big tit", "large breast", "slim", "curvy", "athletic", "bbw"}
|
|
for _, bodyType := range bodyTypes {
|
|
if strings.Contains(query, bodyType) {
|
|
sq.BodyTypes = append(sq.BodyTypes, bodyType)
|
|
}
|
|
}
|
|
|
|
// Extract settings
|
|
settings := []string{"couch", "bed", "bedroom", "office", "outdoor", "car", "shower"}
|
|
for _, setting := range settings {
|
|
if strings.Contains(query, setting) {
|
|
sq.Settings = append(sq.Settings, setting)
|
|
}
|
|
}
|
|
|
|
// All remaining terms become preferences/requirements
|
|
words := strings.Fields(query)
|
|
for _, word := range words {
|
|
if len(word) > 2 && !p.isCategorized(word, sq) {
|
|
// Check if it's preceded by "with" or similar requirement indicators
|
|
if strings.Contains(query, "with "+word) || strings.Contains(query, "has "+word) {
|
|
sq.Requirements = append(sq.Requirements, word)
|
|
} else {
|
|
sq.Preferences = append(sq.Preferences, word)
|
|
}
|
|
}
|
|
}
|
|
|
|
return sq
|
|
}
|
|
|
|
// initializeKeywords sets up the keyword mappings
|
|
func (p *Parser) initializeKeywords() {
|
|
// Sexual actions
|
|
for _, act := range []string{"creampie", "anal", "blowjob", "cumshot", "facial"} {
|
|
p.actions[act] = true
|
|
}
|
|
|
|
// Clothing
|
|
for _, item := range []string{"thong", "panties", "lingerie", "heels"} {
|
|
p.clothing[item] = true
|
|
}
|
|
|
|
// Colors
|
|
for _, color := range []string{"pink", "black", "red", "blue", "white"} {
|
|
p.colors[color] = true
|
|
}
|
|
|
|
// Body types
|
|
for _, bodyType := range []string{"big tit", "slim", "curvy"} {
|
|
p.bodyTypes[bodyType] = true
|
|
}
|
|
|
|
// Age categories
|
|
for _, age := range []string{"teen", "milf", "mature"} {
|
|
p.ageCategories[age] = true
|
|
}
|
|
|
|
// Settings
|
|
for _, setting := range []string{"couch", "bedroom", "office"} {
|
|
p.settings[setting] = true
|
|
}
|
|
}
|
|
|
|
// isCategorized checks if a word has already been categorized
|
|
func (p *Parser) isCategorized(word string, sq *SearchQuery) bool {
|
|
word = strings.ToLower(word)
|
|
|
|
for _, performer := range sq.Performers {
|
|
if strings.Contains(strings.ToLower(performer), word) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return p.actions[word] || p.clothing[word] || p.colors[word] ||
|
|
p.bodyTypes[word] || p.ageCategories[word] || p.settings[word]
|
|
}
|