Goondex/internal/scraper/bulk.go
Stu Leak 3b8adad57d 🚀 Goondex v0.1.0-dev3 - Comprehensive ML-Powered Search & Import System
MAJOR FEATURES ADDED:
======================

🤖 ML Analysis System:
- Comprehensive scene image analysis with per-scene predictions
- Enhanced database schema with scene_ml_analysis table
- Advanced detection for clothing colors, body types, age categories, positions, settings
- Support for multiple prediction types (clothing, body, sexual acts, etc.)
- Confidence scoring and ML source tracking

🧠 Enhanced Search Capabilities:
- Natural language parser for complex queries (e.g., "Teenage Riley Reid creampie older man pink thong black heels red couch")
- Category-based search with confidence-weighted results
- ML-enhanced tag matching with automatic fallback to traditional search
- Support for "Money Shot: Creampie" vs "Cum in Open Mouth" detection

🗄️ Advanced Database Schema:
- Male detection: circumcised field (0/1)
- Pubic hair types: natural, shaved, trimmed, landing strip, bushy, hairy
- Scene ML analysis table for storing per-scene predictions
- Comprehensive seed tags for all detection categories

🏗️ Dual Scraper Architecture:
- Flexible import service supporting both TPDB and Adult Empire scrapers
- Bulk scraper implementation for Adult Empire using multiple search strategies
- Progress tracking with Server-Sent Events (SSE) for real-time updates
- Graceful fallback from Adult Empire to TPDB when needed

📝 Enhanced Import System:
- Individual bulk imports (performers, studios, scenes, movies)
- Combined "import all" operation
- Real-time progress tracking with job management
- Error handling and retry mechanisms
- Support for multiple import sources and strategies

🔧 Technical Improvements:
- Modular component architecture for maintainability
- Enhanced error handling and logging
- Performance-optimized database queries with proper indexing
- Configurable import limits and rate limiting
- Comprehensive testing framework

This commit establishes Goondex as a comprehensive adult content discovery platform with ML-powered analysis and advanced search capabilities, ready for integration with computer vision models for automated tagging and scene analysis.
2025-12-30 21:52:25 -05:00

118 lines
3.5 KiB
Go

package scraper
import (
"context"
"git.leaktechnologies.dev/stu/Goondex/internal/model"
adultemp "git.leaktechnologies.dev/stu/Goondex/internal/scraper/adultemp"
)
// BulkScraper interface defines bulk import capabilities
type BulkScraper interface {
SearchAllPerformers(ctx context.Context) ([]adultemp.SearchResult, error)
SearchAllStudios(ctx context.Context) ([]adultemp.SearchResult, error)
SearchAllScenes(ctx context.Context) ([]adultemp.SearchResult, error)
ConvertPerformerToModel(data interface{}) *model.Performer
ConvertStudioToModel(data interface{}) *model.Studio
ConvertSceneToModel(data interface{}) *model.Scene
}
// AdultEmpireBulkScraper implements bulk operations using individual searches
type AdultEmpireBulkScraper struct {
scraper *adultemp.Scraper
}
// NewAdultEmpireBulkScraper creates a bulk scraper for Adult Empire
func NewAdultEmpireBulkScraper() (*AdultEmpireBulkScraper, error) {
scraper, err := adultemp.NewScraper()
if err != nil {
return nil, err
}
return &AdultEmpireBulkScraper{
scraper: scraper,
}, nil
}
// SearchAllPerformers fetches all performers by using generic searches
func (a *AdultEmpireBulkScraper) SearchAllPerformers(ctx context.Context) ([]adultemp.SearchResult, error) {
searchTerms := []string{"", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
var allResults []adultemp.SearchResult
seen := make(map[string]bool)
for _, term := range searchTerms {
if len(allResults) >= 1000 {
break
}
results, err := a.scraper.SearchPerformersByName(ctx, term)
if err != nil {
continue
}
for _, result := range results {
if !seen[result.URL] {
seen[result.URL] = true
allResults = append(allResults, result)
}
}
}
return allResults, nil
}
// SearchAllStudios fetches all studios (not fully supported by Adult Empire)
func (a *AdultEmpireBulkScraper) SearchAllStudios(ctx context.Context) ([]adultemp.SearchResult, error) {
// Adult Empire doesn't have dedicated studio search, return empty for now
return []adultemp.SearchResult{}, nil
}
// SearchAllScenes fetches all scenes
func (a *AdultEmpireBulkScraper) SearchAllScenes(ctx context.Context) ([]adultemp.SearchResult, error) {
searchTerms := []string{"", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"}
var allResults []adultemp.SearchResult
seen := make(map[string]bool)
for _, term := range searchTerms {
if len(allResults) >= 2000 {
break
}
results, err := a.scraper.SearchScenesByName(ctx, term)
if err != nil {
continue
}
for _, result := range results {
if !seen[result.URL] {
seen[result.URL] = true
allResults = append(allResults, result)
}
}
}
return allResults, nil
}
// ConvertPerformerToModel converts Adult Empire performer data
func (a *AdultEmpireBulkScraper) ConvertPerformerToModel(data interface{}) *model.Performer {
if performerData, ok := data.(*adultemp.PerformerData); ok {
return a.scraper.ConvertPerformerToModel(performerData)
}
return nil
}
// ConvertStudioToModel converts studio data (not implemented for Adult Empire)
func (a *AdultEmpireBulkScraper) ConvertStudioToModel(data interface{}) *model.Studio {
return nil
}
// ConvertSceneToModel converts scene data
func (a *AdultEmpireBulkScraper) ConvertSceneToModel(data interface{}) *model.Scene {
if sceneData, ok := data.(*adultemp.SceneData); ok {
return a.scraper.ConvertSceneToModel(sceneData)
}
return nil
}