Goondex/internal/scraper/merger/performer_merger.go
Stu Leak 16fb407a3c v0.1.0-dev4: Add web frontend with UI component library
- Implement full web interface with Go html/template server
- Add GX component library (buttons, dialogs, tables, forms, etc.)
- Create scene/performer/studio/movie detail and listing pages
- Add Adult Empire scraper for additional metadata sources
- Implement movie support with database schema
- Add import and sync services for data management
- Include comprehensive API and frontend documentation
- Add custom color scheme and responsive layout

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-17 10:47:30 -05:00

130 lines
3.8 KiB
Go

package merger
import (
"fmt"
"strings"
"git.leaktechnologies.dev/stu/Goondex/internal/model"
"git.leaktechnologies.dev/stu/Goondex/internal/scraper/adultemp"
)
// MergePerformerData intelligently combines data from multiple sources
// Priority: TPDB data is primary, Adult Empire fills in gaps or provides additional context
func MergePerformerData(tpdbPerformer *model.Performer, adultempData *adultemp.PerformerData) *model.Performer {
merged := tpdbPerformer
// Fill in missing fields from Adult Empire
if merged.Birthday == "" && adultempData.Birthdate != "" {
merged.Birthday = adultempData.Birthdate
}
if merged.Ethnicity == "" && adultempData.Ethnicity != "" {
merged.Ethnicity = adultempData.Ethnicity
}
if merged.Country == "" && adultempData.Country != "" {
merged.Country = adultempData.Country
}
if merged.HairColor == "" && adultempData.HairColor != "" {
merged.HairColor = adultempData.HairColor
}
if merged.EyeColor == "" && adultempData.EyeColor != "" {
merged.EyeColor = adultempData.EyeColor
}
if merged.Measurements == "" && adultempData.Measurements != "" {
merged.Measurements = adultempData.Measurements
}
// Height: prefer TPDB if available, otherwise use Adult Empire
if merged.Height == 0 && adultempData.Height != "" {
// Parse height from Adult Empire format (e.g., "168 cm")
// This is already converted by the Adult Empire scraper
// We just need to extract the numeric value
var height int
if _, err := fmt.Sscanf(adultempData.Height, "%d cm", &height); err == nil {
merged.Height = height
}
}
// Bio: Combine if both exist, otherwise use whichever is available
if merged.Bio == "" && adultempData.Biography != "" {
merged.Bio = adultempData.Biography
} else if merged.Bio != "" && adultempData.Biography != "" {
// If both exist and are different, append Adult Empire bio
if !strings.Contains(merged.Bio, adultempData.Biography) {
merged.Bio = merged.Bio + "\n\n[Adult Empire]: " + adultempData.Biography
}
}
// Aliases: Merge unique aliases
if len(adultempData.Aliases) > 0 {
aliasesStr := strings.Join(adultempData.Aliases, ", ")
if merged.Aliases == "" {
merged.Aliases = aliasesStr
} else {
// Add new aliases that aren't already present
existingAliases := strings.Split(merged.Aliases, ",")
existingMap := make(map[string]bool)
for _, alias := range existingAliases {
existingMap[strings.TrimSpace(alias)] = true
}
for _, newAlias := range adultempData.Aliases {
trimmed := strings.TrimSpace(newAlias)
if !existingMap[trimmed] {
merged.Aliases += ", " + trimmed
}
}
}
}
// Image URL: prefer TPDB, but keep Adult Empire as fallback reference
// We don't override TPDB images as they're generally higher quality
if merged.ImageURL == "" && adultempData.Image != "" {
merged.ImageURL = adultempData.Image
}
return merged
}
// ShouldMerge determines if two performers are likely the same person
// Returns true if names match closely enough
func ShouldMerge(performer1Name, performer2Name string) bool {
name1 := strings.ToLower(strings.TrimSpace(performer1Name))
name2 := strings.ToLower(strings.TrimSpace(performer2Name))
// Exact match
if name1 == name2 {
return true
}
// Check if one name is contained in the other
// (e.g., "Riley Reid" and "Riley Red" should not match,
// but "Riley Reid" and "Reid, Riley" should)
words1 := strings.Fields(name1)
words2 := strings.Fields(name2)
// If all words from one name are in the other, consider it a match
matchCount := 0
for _, word1 := range words1 {
for _, word2 := range words2 {
if word1 == word2 {
matchCount++
break
}
}
}
// At least 70% of words must match
threshold := 0.7
maxWords := len(words1)
if len(words2) > maxWords {
maxWords = len(words2)
}
return float64(matchCount)/float64(maxWords) >= threshold
}