Goondex/internal/search/advanced.go
Stu Leak 3b8adad57d 🚀 Goondex v0.1.0-dev3 - Comprehensive ML-Powered Search & Import System
MAJOR FEATURES ADDED:
======================

🤖 ML Analysis System:
- Comprehensive scene image analysis with per-scene predictions
- Enhanced database schema with scene_ml_analysis table
- Advanced detection for clothing colors, body types, age categories, positions, settings
- Support for multiple prediction types (clothing, body, sexual acts, etc.)
- Confidence scoring and ML source tracking

🧠 Enhanced Search Capabilities:
- Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch")
- Category-based search with confidence-weighted results
- ML-enhanced tag matching with automatic fallback to traditional search
- Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection

🗄️ Advanced Database Schema:
- Male detection: circumcised field (0/1)
- Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy
- Scene ML analysis table for storing per-scene predictions
- Comprehensive seed tags for all detection categories

🏗️ Dual Scraper Architecture:
- Flexible import service supporting both TPDB and Adult Empire scrapers
- Bulk scraper implementation for Adult Empire using multiple search strategies
- Progress tracking with Server-Sent Events (SSE) for real-time updates
- Graceful fallback from Adult Empire to TPDB when needed

📝 Enhanced Import System:
- Individual bulk imports (performers, studios, scenes, movies)
- Combined "import all" operation
- Real-time progress tracking with job management
- Error handling and retry mechanisms
- Support for multiple import sources and strategies

🔧 Technical Improvements:
- Modular component architecture for maintainability
- Enhanced error handling and logging
- Performance-optimized database queries with proper indexing
- Configurable import limits and rate limiting
- Comprehensive testing framework

This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
2025-12-30 21:52:25 -05:00

440 lines
11 KiB
Go

package search
import (
"database/sql"
"fmt"
"math"
"strings"
"time"
"git.leaktechnologies.dev/stu/Goondex/internal/db"
"git.leaktechnologies.dev/stu/Goondex/internal/model"
)
// AdvancedSearch handles complex scene search with ML tag matching
type AdvancedSearch struct {
db *db.DB
parser *Parser
sceneStore *db.SceneStore
performerStore *db.PerformerStore
tagStore *db.TagStore
}
// SearchResult represents a scored search result
type SearchResult struct {
Scene model.Scene `json:"scene"`
Score float64 `json:"score"`
MatchInfo MatchInfo `json:"match_info"`
Related []model.Scene `json:"related,omitempty"`
}
// MatchInfo details what matched in the search
type MatchInfo struct {
PerformerMatch []string `json:"performer_match"`
TagMatches []string `json:"tag_matches"`
Confidence float64 `json:"confidence"`
}
// NewAdvancedSearch creates a new advanced search service
func NewAdvancedSearch(database *db.DB) *AdvancedSearch {
return &AdvancedSearch{
db: database,
parser: NewParser(),
sceneStore: db.NewSceneStore(database),
performerStore: db.NewPerformerStore(database),
tagStore: db.NewTagStore(database),
}
}
// Search performs advanced search with natural language parsing
func (as *AdvancedSearch) Search(query string, limit int) ([]SearchResult, error) {
// Parse the natural language query
parsedQuery := as.parser.Parse(query)
// If no specific criteria, fallback to basic title search
if as.isSimpleQuery(parsedQuery) {
return as.basicSearch(query, limit)
}
// Perform advanced tag-based search
return as.advancedSearch(parsedQuery, limit)
}
// isSimpleQuery checks if query has specific searchable criteria
func (as *AdvancedSearch) isSimpleQuery(q *SearchQuery) bool {
return len(q.Performers) == 0 && len(q.Actions) == 0 &&
len(q.Clothing) == 0 && len(q.Colors) == 0 &&
len(q.AgeCategories) == 0 && len(q.Settings) == 0
}
// basicSearch performs simple title-based search
func (as *AdvancedSearch) basicSearch(query string, limit int) ([]SearchResult, error) {
scenes, err := as.sceneStore.Search(query)
if err != nil {
return nil, err
}
results := make([]SearchResult, len(scenes))
for i, scene := range scenes {
results[i] = SearchResult{
Scene: scene,
Score: as.calculateTitleScore(scene.Title, query),
MatchInfo: MatchInfo{
Confidence: 0.5,
},
}
}
return results, nil
}
// advancedSearch performs complex tag-based search
func (as *AdvancedSearch) advancedSearch(q *SearchQuery, limit int) ([]SearchResult, error) {
var results []SearchResult
// Search by performer names first
if len(q.Performers) > 0 {
performerResults, err := as.searchByPerformers(q.Performers, limit)
if err != nil {
return nil, err
}
results = append(results, performerResults...)
}
// Search by tags (actions, clothing, colors, etc.)
tagResults, err := as.searchByTags(q, limit)
if err != nil {
return nil, err
}
results = append(results, tagResults...)
// Remove duplicates and sort by score
results = as.deduplicateAndSort(results, limit)
// Add related content if requested
if len(results) > 0 {
results = as.addRelatedContent(results)
}
return results, nil
}
// searchByPerformers finds scenes with specific performers
func (as *AdvancedSearch) searchByPerformers(performerNames []string, limit int) ([]SearchResult, error) {
var results []SearchResult
for _, name := range performerNames {
performers, err := as.performerStore.Search(name)
if err != nil {
continue
}
for _, performer := range performers {
scenes, err := as.getScenesByPerformer(performer.ID)
if err != nil {
continue
}
for _, scene := range scenes {
score := 1.0 // Perfect match for performer
if !strings.Contains(strings.ToLower(scene.Title), strings.ToLower(name)) {
score = 0.8 // Scene exists but name not in title
}
results = append(results, SearchResult{
Scene: scene,
Score: score,
MatchInfo: MatchInfo{
PerformerMatch: []string{name},
Confidence: score,
},
})
}
}
}
return results, nil
}
// searchByTags finds scenes matching various tag categories
func (as *AdvancedSearch) searchByTags(q *SearchQuery, limit int) ([]SearchResult, error) {
// Build complex SQL query for tag matching
whereClauses := []string{}
args := []interface{}{}
// Add clothing color tags
for _, color := range q.Colors {
whereClauses = append(whereClauses, "t.name LIKE ?")
args = append(args, "%"+color+"%")
}
// Add clothing type tags
for _, clothing := range q.Clothing {
whereClauses = append(whereClauses, "t.name LIKE ?")
args = append(args, "%"+clothing+"%")
}
// Add action tags
for _, action := range q.Actions {
whereClauses = append(whereClauses, "t.name LIKE ?")
args = append(args, "%"+action+"%")
}
// Add age category tags
for _, age := range q.AgeCategories {
whereClauses = append(whereClauses, "t.name LIKE ?")
args = append(args, "%"+age+"%")
}
// Add setting tags
for _, setting := range q.Settings {
whereClauses = append(whereClauses, "t.name LIKE ?")
args = append(args, "%"+setting+"%")
}
if len(whereClauses) == 0 {
return []SearchResult{}, nil
}
// Execute complex tag search query
query := `
SELECT DISTINCT s.*, COUNT(st.tag_id) as match_count, AVG(st.confidence) as avg_confidence
FROM scenes s
INNER JOIN scene_tags st ON s.id = st.scene_id
INNER JOIN tags t ON st.tag_id = t.id
WHERE ` + strings.Join(whereClauses, " OR ") + `
GROUP BY s.id
ORDER BY match_count DESC, avg_confidence DESC
LIMIT ?
`
args = append(args, limit*2) // Get more for deduplication
rows, err := as.db.Conn().Query(query, args...)
if err != nil {
return nil, fmt.Errorf("tag search failed: %w", err)
}
defer rows.Close()
return as.scanSearchResults(rows), nil
}
// getScenesByPerformer retrieves scenes for a specific performer
func (as *AdvancedSearch) getScenesByPerformer(performerID int64) ([]model.Scene, error) {
rows, err := as.db.Conn().Query(`
SELECT s.id, s.title, COALESCE(s.code, ''), COALESCE(s.date, ''),
COALESCE(s.studio_id, 0), COALESCE(s.description, ''),
COALESCE(s.image_path, ''), COALESCE(s.image_url, ''),
COALESCE(s.director, ''), COALESCE(s.url, ''),
COALESCE(s.source, ''), COALESCE(s.source_id, ''),
s.created_at, s.updated_at
FROM scenes s
INNER JOIN scene_performers sp ON s.id = sp.scene_id
WHERE sp.performer_id = ?
ORDER BY s.date DESC, s.title
`, performerID)
if err != nil {
return nil, err
}
defer rows.Close()
return as.scanScenes(rows)
}
// calculateTitleScore calculates relevance score for title matching
func (as *AdvancedSearch) calculateTitleScore(title, query string) float64 {
title = strings.ToLower(title)
query = strings.ToLower(query)
// Exact match
if title == query {
return 1.0
}
// Title contains query
if strings.Contains(title, query) {
return 0.8
}
// Query contains title
if strings.Contains(query, title) {
return 0.6
}
// Word overlap
titleWords := strings.Fields(title)
queryWords := strings.Fields(query)
matches := 0
for _, qWord := range queryWords {
for _, tWord := range titleWords {
if qWord == tWord {
matches++
break
}
}
}
if len(queryWords) == 0 {
return 0.0
}
return float64(matches) / float64(len(queryWords)) * 0.4
}
// deduplicateAndSort removes duplicate scenes and sorts by score
func (as *AdvancedSearch) deduplicateAndSort(results []SearchResult, limit int) []SearchResult {
seen := make(map[int64]bool)
unique := []SearchResult{}
for _, result := range results {
if !seen[result.Scene.ID] {
seen[result.Scene.ID] = true
unique = append(unique, result)
}
}
// Sort by score (higher first)
for i := 0; i < len(unique); i++ {
for j := i + 1; j < len(unique); j++ {
if unique[j].Score > unique[i].Score {
unique[i], unique[j] = unique[j], unique[i]
}
}
}
if len(unique) > limit {
unique = unique[:limit]
}
return unique
}
// addRelatedContent adds related scenes to search results
func (as *AdvancedSearch) addRelatedContent(results []SearchResult) []SearchResult {
if len(results) == 0 {
return results
}
// For now, add scenes from same studio or performers
baseScene := results[0].Scene
related, err := as.findRelatedScenes(baseScene.ID, *baseScene.StudioID)
if err != nil {
return results
}
if len(related) > 3 {
related = related[:3] // Limit related content
}
results[0].Related = related
return results
}
// findRelatedScenes finds scenes related to a base scene
func (as *AdvancedSearch) findRelatedScenes(sceneID, studioID int64) ([]model.Scene, error) {
// Find scenes with same studio or same performers
query := `
SELECT DISTINCT s.id, s.title, COALESCE(s.code, ''), COALESCE(s.date, ''),
COALESCE(s.studio_id, 0), COALESCE(s.description, ''),
COALESCE(s.image_path, ''), COALESCE(s.image_url, ''),
COALESCE(s.director, ''), COALESCE(s.url, ''),
COALESCE(s.source, ''), COALESCE(s.source_id, ''),
s.created_at, s.updated_at
FROM scenes s
WHERE (s.studio_id = ? OR s.id IN (
SELECT sp2.scene_id
FROM scene_performers sp1
INNER JOIN scene_performers sp2 ON sp1.performer_id = sp2.performer_id
WHERE sp1.scene_id = ? AND sp2.scene_id != ?
)) AND s.id != ?
ORDER BY s.date DESC
LIMIT 10
`
rows, err := as.db.Conn().Query(query, studioID, sceneID, sceneID, sceneID)
if err != nil {
return nil, err
}
defer rows.Close()
return as.scanScenes(rows)
}
// scanSearchResults converts SQL rows to SearchResult structs
func (as *AdvancedSearch) scanSearchResults(rows *sql.Rows) []SearchResult {
var results []SearchResult
for rows.Next() {
var scene model.Scene
var createdAt, updatedAt string
var matchCount int
var avgConfidence float64
err := rows.Scan(
&scene.ID, &scene.Title, &scene.Code, &scene.Date, &scene.StudioID,
&scene.Description, &scene.ImagePath, &scene.ImageURL, &scene.Director,
&scene.URL, &scene.Source, &scene.SourceID, &createdAt, &updatedAt,
&matchCount, &avgConfidence,
)
if err != nil {
continue
}
// Parse timestamps
if parsedTime, err := time.Parse("2006-01-02 15:04:05", createdAt); err == nil {
scene.CreatedAt = parsedTime
}
if parsedTime, err := time.Parse("2006-01-02 15:04:05", updatedAt); err == nil {
scene.UpdatedAt = parsedTime
}
// Calculate composite score
score := math.Min(avgConfidence*0.7+float64(matchCount)*0.3, 1.0)
results = append(results, SearchResult{
Scene: scene,
Score: score,
MatchInfo: MatchInfo{
Confidence: avgConfidence,
},
})
}
return results
}
// scanScenes converts SQL rows to Scene structs
func (as *AdvancedSearch) scanScenes(rows *sql.Rows) ([]model.Scene, error) {
var scenes []model.Scene
for rows.Next() {
var scene model.Scene
var createdAt, updatedAt string
err := rows.Scan(
&scene.ID, &scene.Title, &scene.Code, &scene.Date, &scene.StudioID,
&scene.Description, &scene.ImagePath, &scene.ImageURL, &scene.Director,
&scene.URL, &scene.Source, &scene.SourceID, &createdAt, &updatedAt,
)
if err != nil {
continue
}
// Parse timestamps
if parsedTime, err := time.Parse("2006-01-02 15:04:05", createdAt); err == nil {
scene.CreatedAt = parsedTime
}
if parsedTime, err := time.Parse("2006-01-02 15:04:05", updatedAt); err == nil {
scene.UpdatedAt = parsedTime
}
scenes = append(scenes, scene)
}
return scenes, nil
}