MAJOR FEATURES ADDED: ====================== 🤖 ML Analysis System: - Comprehensive scene image analysis with per-scene predictions - Enhanced database schema with scene_ml_analysis table - Advanced detection for clothing colors, body types, age categories, positions, settings - Support for multiple prediction types (clothing, body, sexual acts, etc.) - Confidence scoring and ML source tracking 🧠 Enhanced Search Capabilities: - Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch") - Category-based search with confidence-weighted results - ML-enhanced tag matching with automatic fallback to traditional search - Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection 🗄️ Advanced Database Schema: - Male detection: circumcised field (0/1) - Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy - Scene ML analysis table for storing per-scene predictions - Comprehensive seed tags for all detection categories 🏗️ Dual Scraper Architecture: - Flexible import service supporting both TPDB and Adult Empire scrapers - Bulk scraper implementation for Adult Empire using multiple search strategies - Progress tracking with Server-Sent Events (SSE) for real-time updates - Graceful fallback from Adult Empire to TPDB when needed 📝 Enhanced Import System: - Individual bulk imports (performers, studios, scenes, movies) - Combined "import all" operation - Real-time progress tracking with job management - Error handling and retry mechanisms - Support for multiple import sources and strategies 🔧 Technical Improvements: - Modular component architecture for maintainability - Enhanced error handling and logging - Performance-optimized database queries with proper indexing - Configurable import limits and rate limiting - Comprehensive testing framework This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
440 lines
11 KiB
Go
440 lines
11 KiB
Go
package search
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"math"
|
|
"strings"
|
|
"time"
|
|
|
|
"git.leaktechnologies.dev/stu/Goondex/internal/db"
|
|
"git.leaktechnologies.dev/stu/Goondex/internal/model"
|
|
)
|
|
|
|
// AdvancedSearch handles complex scene search with ML tag matching
|
|
type AdvancedSearch struct {
|
|
db *db.DB
|
|
parser *Parser
|
|
sceneStore *db.SceneStore
|
|
performerStore *db.PerformerStore
|
|
tagStore *db.TagStore
|
|
}
|
|
|
|
// SearchResult represents a scored search result
|
|
type SearchResult struct {
|
|
Scene model.Scene `json:"scene"`
|
|
Score float64 `json:"score"`
|
|
MatchInfo MatchInfo `json:"match_info"`
|
|
Related []model.Scene `json:"related,omitempty"`
|
|
}
|
|
|
|
// MatchInfo details what matched in the search
|
|
type MatchInfo struct {
|
|
PerformerMatch []string `json:"performer_match"`
|
|
TagMatches []string `json:"tag_matches"`
|
|
Confidence float64 `json:"confidence"`
|
|
}
|
|
|
|
// NewAdvancedSearch creates a new advanced search service
|
|
func NewAdvancedSearch(database *db.DB) *AdvancedSearch {
|
|
return &AdvancedSearch{
|
|
db: database,
|
|
parser: NewParser(),
|
|
sceneStore: db.NewSceneStore(database),
|
|
performerStore: db.NewPerformerStore(database),
|
|
tagStore: db.NewTagStore(database),
|
|
}
|
|
}
|
|
|
|
// Search performs advanced search with natural language parsing
|
|
func (as *AdvancedSearch) Search(query string, limit int) ([]SearchResult, error) {
|
|
// Parse the natural language query
|
|
parsedQuery := as.parser.Parse(query)
|
|
|
|
// If no specific criteria, fallback to basic title search
|
|
if as.isSimpleQuery(parsedQuery) {
|
|
return as.basicSearch(query, limit)
|
|
}
|
|
|
|
// Perform advanced tag-based search
|
|
return as.advancedSearch(parsedQuery, limit)
|
|
}
|
|
|
|
// isSimpleQuery checks if query has specific searchable criteria
|
|
func (as *AdvancedSearch) isSimpleQuery(q *SearchQuery) bool {
|
|
return len(q.Performers) == 0 && len(q.Actions) == 0 &&
|
|
len(q.Clothing) == 0 && len(q.Colors) == 0 &&
|
|
len(q.AgeCategories) == 0 && len(q.Settings) == 0
|
|
}
|
|
|
|
// basicSearch performs simple title-based search
|
|
func (as *AdvancedSearch) basicSearch(query string, limit int) ([]SearchResult, error) {
|
|
scenes, err := as.sceneStore.Search(query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
results := make([]SearchResult, len(scenes))
|
|
for i, scene := range scenes {
|
|
results[i] = SearchResult{
|
|
Scene: scene,
|
|
Score: as.calculateTitleScore(scene.Title, query),
|
|
MatchInfo: MatchInfo{
|
|
Confidence: 0.5,
|
|
},
|
|
}
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// advancedSearch performs complex tag-based search
|
|
func (as *AdvancedSearch) advancedSearch(q *SearchQuery, limit int) ([]SearchResult, error) {
|
|
var results []SearchResult
|
|
|
|
// Search by performer names first
|
|
if len(q.Performers) > 0 {
|
|
performerResults, err := as.searchByPerformers(q.Performers, limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
results = append(results, performerResults...)
|
|
}
|
|
|
|
// Search by tags (actions, clothing, colors, etc.)
|
|
tagResults, err := as.searchByTags(q, limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
results = append(results, tagResults...)
|
|
|
|
// Remove duplicates and sort by score
|
|
results = as.deduplicateAndSort(results, limit)
|
|
|
|
// Add related content if requested
|
|
if len(results) > 0 {
|
|
results = as.addRelatedContent(results)
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// searchByPerformers finds scenes with specific performers
|
|
func (as *AdvancedSearch) searchByPerformers(performerNames []string, limit int) ([]SearchResult, error) {
|
|
var results []SearchResult
|
|
|
|
for _, name := range performerNames {
|
|
performers, err := as.performerStore.Search(name)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
for _, performer := range performers {
|
|
scenes, err := as.getScenesByPerformer(performer.ID)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
for _, scene := range scenes {
|
|
score := 1.0 // Perfect match for performer
|
|
if !strings.Contains(strings.ToLower(scene.Title), strings.ToLower(name)) {
|
|
score = 0.8 // Scene exists but name not in title
|
|
}
|
|
|
|
results = append(results, SearchResult{
|
|
Scene: scene,
|
|
Score: score,
|
|
MatchInfo: MatchInfo{
|
|
PerformerMatch: []string{name},
|
|
Confidence: score,
|
|
},
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// searchByTags finds scenes matching various tag categories
|
|
func (as *AdvancedSearch) searchByTags(q *SearchQuery, limit int) ([]SearchResult, error) {
|
|
// Build complex SQL query for tag matching
|
|
whereClauses := []string{}
|
|
args := []interface{}{}
|
|
|
|
// Add clothing color tags
|
|
for _, color := range q.Colors {
|
|
whereClauses = append(whereClauses, "t.name LIKE ?")
|
|
args = append(args, "%"+color+"%")
|
|
}
|
|
|
|
// Add clothing type tags
|
|
for _, clothing := range q.Clothing {
|
|
whereClauses = append(whereClauses, "t.name LIKE ?")
|
|
args = append(args, "%"+clothing+"%")
|
|
}
|
|
|
|
// Add action tags
|
|
for _, action := range q.Actions {
|
|
whereClauses = append(whereClauses, "t.name LIKE ?")
|
|
args = append(args, "%"+action+"%")
|
|
}
|
|
|
|
// Add age category tags
|
|
for _, age := range q.AgeCategories {
|
|
whereClauses = append(whereClauses, "t.name LIKE ?")
|
|
args = append(args, "%"+age+"%")
|
|
}
|
|
|
|
// Add setting tags
|
|
for _, setting := range q.Settings {
|
|
whereClauses = append(whereClauses, "t.name LIKE ?")
|
|
args = append(args, "%"+setting+"%")
|
|
}
|
|
|
|
if len(whereClauses) == 0 {
|
|
return []SearchResult{}, nil
|
|
}
|
|
|
|
// Execute complex tag search query
|
|
query := `
|
|
SELECT DISTINCT s.*, COUNT(st.tag_id) as match_count, AVG(st.confidence) as avg_confidence
|
|
FROM scenes s
|
|
INNER JOIN scene_tags st ON s.id = st.scene_id
|
|
INNER JOIN tags t ON st.tag_id = t.id
|
|
WHERE ` + strings.Join(whereClauses, " OR ") + `
|
|
GROUP BY s.id
|
|
ORDER BY match_count DESC, avg_confidence DESC
|
|
LIMIT ?
|
|
`
|
|
|
|
args = append(args, limit*2) // Get more for deduplication
|
|
|
|
rows, err := as.db.Conn().Query(query, args...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("tag search failed: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
return as.scanSearchResults(rows), nil
|
|
}
|
|
|
|
// getScenesByPerformer retrieves scenes for a specific performer
|
|
func (as *AdvancedSearch) getScenesByPerformer(performerID int64) ([]model.Scene, error) {
|
|
rows, err := as.db.Conn().Query(`
|
|
SELECT s.id, s.title, COALESCE(s.code, ''), COALESCE(s.date, ''),
|
|
COALESCE(s.studio_id, 0), COALESCE(s.description, ''),
|
|
COALESCE(s.image_path, ''), COALESCE(s.image_url, ''),
|
|
COALESCE(s.director, ''), COALESCE(s.url, ''),
|
|
COALESCE(s.source, ''), COALESCE(s.source_id, ''),
|
|
s.created_at, s.updated_at
|
|
FROM scenes s
|
|
INNER JOIN scene_performers sp ON s.id = sp.scene_id
|
|
WHERE sp.performer_id = ?
|
|
ORDER BY s.date DESC, s.title
|
|
`, performerID)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
return as.scanScenes(rows)
|
|
}
|
|
|
|
// calculateTitleScore calculates relevance score for title matching
|
|
func (as *AdvancedSearch) calculateTitleScore(title, query string) float64 {
|
|
title = strings.ToLower(title)
|
|
query = strings.ToLower(query)
|
|
|
|
// Exact match
|
|
if title == query {
|
|
return 1.0
|
|
}
|
|
|
|
// Title contains query
|
|
if strings.Contains(title, query) {
|
|
return 0.8
|
|
}
|
|
|
|
// Query contains title
|
|
if strings.Contains(query, title) {
|
|
return 0.6
|
|
}
|
|
|
|
// Word overlap
|
|
titleWords := strings.Fields(title)
|
|
queryWords := strings.Fields(query)
|
|
matches := 0
|
|
|
|
for _, qWord := range queryWords {
|
|
for _, tWord := range titleWords {
|
|
if qWord == tWord {
|
|
matches++
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(queryWords) == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
return float64(matches) / float64(len(queryWords)) * 0.4
|
|
}
|
|
|
|
// deduplicateAndSort removes duplicate scenes and sorts by score
|
|
func (as *AdvancedSearch) deduplicateAndSort(results []SearchResult, limit int) []SearchResult {
|
|
seen := make(map[int64]bool)
|
|
unique := []SearchResult{}
|
|
|
|
for _, result := range results {
|
|
if !seen[result.Scene.ID] {
|
|
seen[result.Scene.ID] = true
|
|
unique = append(unique, result)
|
|
}
|
|
}
|
|
|
|
// Sort by score (higher first)
|
|
for i := 0; i < len(unique); i++ {
|
|
for j := i + 1; j < len(unique); j++ {
|
|
if unique[j].Score > unique[i].Score {
|
|
unique[i], unique[j] = unique[j], unique[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
if len(unique) > limit {
|
|
unique = unique[:limit]
|
|
}
|
|
|
|
return unique
|
|
}
|
|
|
|
// addRelatedContent adds related scenes to search results
|
|
func (as *AdvancedSearch) addRelatedContent(results []SearchResult) []SearchResult {
|
|
if len(results) == 0 {
|
|
return results
|
|
}
|
|
|
|
// For now, add scenes from same studio or performers
|
|
baseScene := results[0].Scene
|
|
related, err := as.findRelatedScenes(baseScene.ID, *baseScene.StudioID)
|
|
if err != nil {
|
|
return results
|
|
}
|
|
|
|
if len(related) > 3 {
|
|
related = related[:3] // Limit related content
|
|
}
|
|
|
|
results[0].Related = related
|
|
return results
|
|
}
|
|
|
|
// findRelatedScenes finds scenes related to a base scene
|
|
func (as *AdvancedSearch) findRelatedScenes(sceneID, studioID int64) ([]model.Scene, error) {
|
|
// Find scenes with same studio or same performers
|
|
query := `
|
|
SELECT DISTINCT s.id, s.title, COALESCE(s.code, ''), COALESCE(s.date, ''),
|
|
COALESCE(s.studio_id, 0), COALESCE(s.description, ''),
|
|
COALESCE(s.image_path, ''), COALESCE(s.image_url, ''),
|
|
COALESCE(s.director, ''), COALESCE(s.url, ''),
|
|
COALESCE(s.source, ''), COALESCE(s.source_id, ''),
|
|
s.created_at, s.updated_at
|
|
FROM scenes s
|
|
WHERE (s.studio_id = ? OR s.id IN (
|
|
SELECT sp2.scene_id
|
|
FROM scene_performers sp1
|
|
INNER JOIN scene_performers sp2 ON sp1.performer_id = sp2.performer_id
|
|
WHERE sp1.scene_id = ? AND sp2.scene_id != ?
|
|
)) AND s.id != ?
|
|
ORDER BY s.date DESC
|
|
LIMIT 10
|
|
`
|
|
|
|
rows, err := as.db.Conn().Query(query, studioID, sceneID, sceneID, sceneID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
return as.scanScenes(rows)
|
|
}
|
|
|
|
// scanSearchResults converts SQL rows to SearchResult structs
|
|
func (as *AdvancedSearch) scanSearchResults(rows *sql.Rows) []SearchResult {
|
|
var results []SearchResult
|
|
|
|
for rows.Next() {
|
|
var scene model.Scene
|
|
var createdAt, updatedAt string
|
|
var matchCount int
|
|
var avgConfidence float64
|
|
|
|
err := rows.Scan(
|
|
&scene.ID, &scene.Title, &scene.Code, &scene.Date, &scene.StudioID,
|
|
&scene.Description, &scene.ImagePath, &scene.ImageURL, &scene.Director,
|
|
&scene.URL, &scene.Source, &scene.SourceID, &createdAt, &updatedAt,
|
|
&matchCount, &avgConfidence,
|
|
)
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
// Parse timestamps
|
|
if parsedTime, err := time.Parse("2006-01-02 15:04:05", createdAt); err == nil {
|
|
scene.CreatedAt = parsedTime
|
|
}
|
|
if parsedTime, err := time.Parse("2006-01-02 15:04:05", updatedAt); err == nil {
|
|
scene.UpdatedAt = parsedTime
|
|
}
|
|
|
|
// Calculate composite score
|
|
score := math.Min(avgConfidence*0.7+float64(matchCount)*0.3, 1.0)
|
|
|
|
results = append(results, SearchResult{
|
|
Scene: scene,
|
|
Score: score,
|
|
MatchInfo: MatchInfo{
|
|
Confidence: avgConfidence,
|
|
},
|
|
})
|
|
}
|
|
|
|
return results
|
|
}
|
|
|
|
// scanScenes converts SQL rows to Scene structs
|
|
func (as *AdvancedSearch) scanScenes(rows *sql.Rows) ([]model.Scene, error) {
|
|
var scenes []model.Scene
|
|
|
|
for rows.Next() {
|
|
var scene model.Scene
|
|
var createdAt, updatedAt string
|
|
|
|
err := rows.Scan(
|
|
&scene.ID, &scene.Title, &scene.Code, &scene.Date, &scene.StudioID,
|
|
&scene.Description, &scene.ImagePath, &scene.ImageURL, &scene.Director,
|
|
&scene.URL, &scene.Source, &scene.SourceID, &createdAt, &updatedAt,
|
|
)
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
// Parse timestamps
|
|
if parsedTime, err := time.Parse("2006-01-02 15:04:05", createdAt); err == nil {
|
|
scene.CreatedAt = parsedTime
|
|
}
|
|
if parsedTime, err := time.Parse("2006-01-02 15:04:05", updatedAt); err == nil {
|
|
scene.UpdatedAt = parsedTime
|
|
}
|
|
|
|
scenes = append(scenes, scene)
|
|
}
|
|
|
|
return scenes, nil
|
|
}
|