Goondex/internal/search/parser.go
Stu Leak 3b8adad57d 🚀 Goondex v0.1.0-dev3 - Comprehensive ML-Powered Search & Import System
MAJOR FEATURES ADDED:
======================

🤖 ML Analysis System:
- Comprehensive scene image analysis with per-scene predictions
- Enhanced database schema with scene_ml_analysis table
- Advanced detection for clothing colors, body types, age categories, positions, settings
- Support for multiple prediction types (clothing, body, sexual acts, etc.)
- Confidence scoring and ML source tracking

🧠 Enhanced Search Capabilities:
- Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch")
- Category-based search with confidence-weighted results
- ML-enhanced tag matching with automatic fallback to traditional search
- Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection

🗄️ Advanced Database Schema:
- Male detection: circumcised field (0/1)
- Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy
- Scene ML analysis table for storing per-scene predictions
- Comprehensive seed tags for all detection categories

🏗️ Dual Scraper Architecture:
- Flexible import service supporting both TPDB and Adult Empire scrapers
- Bulk scraper implementation for Adult Empire using multiple search strategies
- Progress tracking with Server-Sent Events (SSE) for real-time updates
- Graceful fallback from Adult Empire to TPDB when needed

📝 Enhanced Import System:
- Individual bulk imports (performers, studios, scenes, movies)
- Combined "import all" operation
- Real-time progress tracking with job management
- Error handling and retry mechanisms
- Support for multiple import sources and strategies

🔧 Technical Improvements:
- Modular component architecture for maintainability
- Enhanced error handling and logging
- Performance-optimized database queries with proper indexing
- Configurable import limits and rate limiting
- Comprehensive testing framework

This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
2025-12-30 21:52:25 -05:00

201 lines
5.4 KiB
Go

package search
import (
"regexp"
"strings"
)
// SearchQuery represents a parsed search query
type SearchQuery struct {
Original string
Performers []string
Actions []string
Clothing []string
Colors []string
BodyTypes []string
AgeCategories []string
Ethnicities []string
Settings []string
Positions []string
Production []string
Requirements []string // must-have terms
Preferences []string // nice-to-have terms
}
// Parser handles natural language search query parsing
type Parser struct {
// Keyword mappings for different categories
actions map[string]bool
clothing map[string]bool
colors map[string]bool
bodyTypes map[string]bool
ageCategories map[string]bool
ethnicities map[string]bool
settings map[string]bool
positions map[string]bool
production map[string]bool
}
// NewParser creates a new search query parser
func NewParser() *Parser {
p := &Parser{
actions: make(map[string]bool),
clothing: make(map[string]bool),
colors: make(map[string]bool),
bodyTypes: make(map[string]bool),
ageCategories: make(map[string]bool),
ethnicities: make(map[string]bool),
settings: make(map[string]bool),
positions: make(map[string]bool),
production: make(map[string]bool),
}
// Initialize keyword mappings
p.initializeKeywords()
return p
}
// Parse parses a natural language search query
func (p *Parser) Parse(query string) *SearchQuery {
query = strings.ToLower(query)
query = strings.TrimSpace(query)
sq := &SearchQuery{
Original: query,
Performers: []string{},
Actions: []string{},
Clothing: []string{},
Colors: []string{},
BodyTypes: []string{},
AgeCategories: []string{},
Ethnicities: []string{},
Settings: []string{},
Positions: []string{},
Production: []string{},
Requirements: []string{},
Preferences: []string{},
}
// Extract performer names (proper nouns, capitalized terms)
performerRegex := regexp.MustCompile(`\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\b`)
matches := performerRegex.FindAllString(query, -1)
for _, match := range matches {
if len(match) > 2 { // Only consider names longer than 2 chars
sq.Performers = append(sq.Performers, match)
}
}
// Extract age-specific terms
if strings.Contains(query, "teen") || strings.Contains(query, "teenage") {
sq.AgeCategories = append(sq.AgeCategories, "teen")
}
if strings.Contains(query, "milf") {
sq.AgeCategories = append(sq.AgeCategories, "milf")
}
if strings.Contains(query, "mature") {
sq.AgeCategories = append(sq.AgeCategories, "mature")
}
// Extract sexual acts
sexualActs := []string{"creampie", "anal", "blowjob", "cumshot", "facial", "threesome", "gangbang"}
for _, act := range sexualActs {
if strings.Contains(query, act) {
sq.Actions = append(sq.Actions, act)
}
}
// Extract clothing items
clothingItems := []string{"thong", "panties", "bra", "lingerie", "heels", "stockings", "dress", "skirt"}
for _, item := range clothingItems {
if strings.Contains(query, item) {
sq.Clothing = append(sq.Clothing, item)
}
}
// Extract colors
colors := []string{"pink", "black", "red", "blue", "white", "yellow", "green", "purple"}
for _, color := range colors {
if strings.Contains(query, color) {
sq.Colors = append(sq.Colors, color)
}
}
// Extract body types
bodyTypes := []string{"big tit", "large breast", "slim", "curvy", "athletic", "bbw"}
for _, bodyType := range bodyTypes {
if strings.Contains(query, bodyType) {
sq.BodyTypes = append(sq.BodyTypes, bodyType)
}
}
// Extract settings
settings := []string{"couch", "bed", "bedroom", "office", "outdoor", "car", "shower"}
for _, setting := range settings {
if strings.Contains(query, setting) {
sq.Settings = append(sq.Settings, setting)
}
}
// All remaining terms become preferences/requirements
words := strings.Fields(query)
for _, word := range words {
if len(word) > 2 && !p.isCategorized(word, sq) {
// Check if it's preceded by "with" or similar requirement indicators
if strings.Contains(query, "with "+word) || strings.Contains(query, "has "+word) {
sq.Requirements = append(sq.Requirements, word)
} else {
sq.Preferences = append(sq.Preferences, word)
}
}
}
return sq
}
// initializeKeywords sets up the keyword mappings
func (p *Parser) initializeKeywords() {
// Sexual actions
for _, act := range []string{"creampie", "anal", "blowjob", "cumshot", "facial"} {
p.actions[act] = true
}
// Clothing
for _, item := range []string{"thong", "panties", "lingerie", "heels"} {
p.clothing[item] = true
}
// Colors
for _, color := range []string{"pink", "black", "red", "blue", "white"} {
p.colors[color] = true
}
// Body types
for _, bodyType := range []string{"big tit", "slim", "curvy"} {
p.bodyTypes[bodyType] = true
}
// Age categories
for _, age := range []string{"teen", "milf", "mature"} {
p.ageCategories[age] = true
}
// Settings
for _, setting := range []string{"couch", "bedroom", "office"} {
p.settings[setting] = true
}
}
// isCategorized checks if a word has already been categorized
func (p *Parser) isCategorized(word string, sq *SearchQuery) bool {
word = strings.ToLower(word)
for _, performer := range sq.Performers {
if strings.Contains(strings.ToLower(performer), word) {
return true
}
}
return p.actions[word] || p.clothing[word] || p.colors[word] ||
p.bodyTypes[word] || p.ageCategories[word] || p.settings[word]
}