MAJOR FEATURES ADDED: ====================== 🤖 ML Analysis System: - Comprehensive scene image analysis with per-scene predictions - Enhanced database schema with scene_ml_analysis table - Advanced detection for clothing colors, body types, age categories, positions, settings - Support for multiple prediction types (clothing, body, sexual acts, etc.) - Confidence scoring and ML source tracking 🧠 Enhanced Search Capabilities: - Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch") - Category-based search with confidence-weighted results - ML-enhanced tag matching with automatic fallback to traditional search - Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection 🗄️ Advanced Database Schema: - Male detection: circumcised field (0/1) - Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy - Scene ML analysis table for storing per-scene predictions - Comprehensive seed tags for all detection categories 🏗️ Dual Scraper Architecture: - Flexible import service supporting both TPDB and Adult Empire scrapers - Bulk scraper implementation for Adult Empire using multiple search strategies - Progress tracking with Server-Sent Events (SSE) for real-time updates - Graceful fallback from Adult Empire to TPDB when needed 📝 Enhanced Import System: - Individual bulk imports (performers, studios, scenes, movies) - Combined "import all" operation - Real-time progress tracking with job management - Error handling and retry mechanisms - Support for multiple import sources and strategies 🔧 Technical Improvements: - Modular component architecture for maintainability - Enhanced error handling and logging - Performance-optimized database queries with proper indexing - Configurable import limits and rate limiting - Comprehensive testing framework This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
118 lines
3.5 KiB
Go
118 lines
3.5 KiB
Go
package scraper
|
|
|
|
import (
|
|
"context"
|
|
"git.leaktechnologies.dev/stu/Goondex/internal/model"
|
|
adultemp "git.leaktechnologies.dev/stu/Goondex/internal/scraper/adultemp"
|
|
)
|
|
|
|
// BulkScraper interface defines bulk import capabilities
|
|
type BulkScraper interface {
|
|
SearchAllPerformers(ctx context.Context) ([]adultemp.SearchResult, error)
|
|
SearchAllStudios(ctx context.Context) ([]adultemp.SearchResult, error)
|
|
SearchAllScenes(ctx context.Context) ([]adultemp.SearchResult, error)
|
|
ConvertPerformerToModel(data interface{}) *model.Performer
|
|
ConvertStudioToModel(data interface{}) *model.Studio
|
|
ConvertSceneToModel(data interface{}) *model.Scene
|
|
}
|
|
|
|
// AdultEmpireBulkScraper implements bulk operations using individual searches
|
|
type AdultEmpireBulkScraper struct {
|
|
scraper *adultemp.Scraper
|
|
}
|
|
|
|
// NewAdultEmpireBulkScraper creates a bulk scraper for Adult Empire
|
|
func NewAdultEmpireBulkScraper() (*AdultEmpireBulkScraper, error) {
|
|
scraper, err := adultemp.NewScraper()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &AdultEmpireBulkScraper{
|
|
scraper: scraper,
|
|
}, nil
|
|
}
|
|
|
|
// SearchAllPerformers fetches all performers by using generic searches
|
|
func (a *AdultEmpireBulkScraper) SearchAllPerformers(ctx context.Context) ([]adultemp.SearchResult, error) {
|
|
searchTerms := []string{"", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
|
|
|
|
var allResults []adultemp.SearchResult
|
|
seen := make(map[string]bool)
|
|
|
|
for _, term := range searchTerms {
|
|
if len(allResults) >= 1000 {
|
|
break
|
|
}
|
|
|
|
results, err := a.scraper.SearchPerformersByName(ctx, term)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
for _, result := range results {
|
|
if !seen[result.URL] {
|
|
seen[result.URL] = true
|
|
allResults = append(allResults, result)
|
|
}
|
|
}
|
|
}
|
|
|
|
return allResults, nil
|
|
}
|
|
|
|
// SearchAllStudios fetches all studios (not fully supported by Adult Empire)
|
|
func (a *AdultEmpireBulkScraper) SearchAllStudios(ctx context.Context) ([]adultemp.SearchResult, error) {
|
|
// Adult Empire doesn't have dedicated studio search, return empty for now
|
|
return []adultemp.SearchResult{}, nil
|
|
}
|
|
|
|
// SearchAllScenes fetches all scenes
|
|
func (a *AdultEmpireBulkScraper) SearchAllScenes(ctx context.Context) ([]adultemp.SearchResult, error) {
|
|
searchTerms := []string{"", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
|
|
|
|
var allResults []adultemp.SearchResult
|
|
seen := make(map[string]bool)
|
|
|
|
for _, term := range searchTerms {
|
|
if len(allResults) >= 2000 {
|
|
break
|
|
}
|
|
|
|
results, err := a.scraper.SearchScenesByName(ctx, term)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
for _, result := range results {
|
|
if !seen[result.URL] {
|
|
seen[result.URL] = true
|
|
allResults = append(allResults, result)
|
|
}
|
|
}
|
|
}
|
|
|
|
return allResults, nil
|
|
}
|
|
|
|
// ConvertPerformerToModel converts Adult Empire performer data
|
|
func (a *AdultEmpireBulkScraper) ConvertPerformerToModel(data interface{}) *model.Performer {
|
|
if performerData, ok := data.(*adultemp.PerformerData); ok {
|
|
return a.scraper.ConvertPerformerToModel(performerData)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// ConvertStudioToModel converts studio data (not implemented for Adult Empire)
|
|
func (a *AdultEmpireBulkScraper) ConvertStudioToModel(data interface{}) *model.Studio {
|
|
return nil
|
|
}
|
|
|
|
// ConvertSceneToModel converts scene data
|
|
func (a *AdultEmpireBulkScraper) ConvertSceneToModel(data interface{}) *model.Scene {
|
|
if sceneData, ok := data.(*adultemp.SceneData); ok {
|
|
return a.scraper.ConvertSceneToModel(sceneData)
|
|
}
|
|
return nil
|
|
}
|