package merger import ( "fmt" "strings" "git.leaktechnologies.dev/stu/Goondex/internal/model" "git.leaktechnologies.dev/stu/Goondex/internal/scraper/adultemp" ) // MergePerformerData intelligently combines data from multiple sources // Priority: TPDB data is primary, Adult Empire fills in gaps or provides additional context func MergePerformerData(tpdbPerformer *model.Performer, adultempData *adultemp.PerformerData) *model.Performer { merged := tpdbPerformer // Fill in missing fields from Adult Empire if merged.Birthday == "" && adultempData.Birthdate != "" { merged.Birthday = adultempData.Birthdate } if merged.Ethnicity == "" && adultempData.Ethnicity != "" { merged.Ethnicity = adultempData.Ethnicity } if merged.Country == "" && adultempData.Country != "" { merged.Country = adultempData.Country } if merged.HairColor == "" && adultempData.HairColor != "" { merged.HairColor = adultempData.HairColor } if merged.EyeColor == "" && adultempData.EyeColor != "" { merged.EyeColor = adultempData.EyeColor } if merged.Measurements == "" && adultempData.Measurements != "" { merged.Measurements = adultempData.Measurements } // Height: prefer TPDB if available, otherwise use Adult Empire if merged.Height == 0 && adultempData.Height != "" { // Parse height from Adult Empire format (e.g., "168 cm") // This is already converted by the Adult Empire scraper // We just need to extract the numeric value var height int if _, err := fmt.Sscanf(adultempData.Height, "%d cm", &height); err == nil { merged.Height = height } } // Bio: Combine if both exist, otherwise use whichever is available if merged.Bio == "" && adultempData.Biography != "" { merged.Bio = adultempData.Biography } else if merged.Bio != "" && adultempData.Biography != "" { // If both exist and are different, append Adult Empire bio if !strings.Contains(merged.Bio, adultempData.Biography) { merged.Bio = merged.Bio + "\n\n[Adult Empire]: " + adultempData.Biography } } // Aliases: Merge unique aliases if len(adultempData.Aliases) > 0 { aliasesStr := strings.Join(adultempData.Aliases, ", ") if merged.Aliases == "" { merged.Aliases = aliasesStr } else { // Add new aliases that aren't already present existingAliases := strings.Split(merged.Aliases, ",") existingMap := make(map[string]bool) for _, alias := range existingAliases { existingMap[strings.TrimSpace(alias)] = true } for _, newAlias := range adultempData.Aliases { trimmed := strings.TrimSpace(newAlias) if !existingMap[trimmed] { merged.Aliases += ", " + trimmed } } } } // Image URL: prefer TPDB, but keep Adult Empire as fallback reference // We don't override TPDB images as they're generally higher quality if merged.ImageURL == "" && adultempData.Image != "" { merged.ImageURL = adultempData.Image } return merged } // ShouldMerge determines if two performers are likely the same person // Returns true if names match closely enough func ShouldMerge(performer1Name, performer2Name string) bool { name1 := strings.ToLower(strings.TrimSpace(performer1Name)) name2 := strings.ToLower(strings.TrimSpace(performer2Name)) // Exact match if name1 == name2 { return true } // Check if one name is contained in the other // (e.g., "Riley Reid" and "Riley Red" should not match, // but "Riley Reid" and "Reid, Riley" should) words1 := strings.Fields(name1) words2 := strings.Fields(name2) // If all words from one name are in the other, consider it a match matchCount := 0 for _, word1 := range words1 { for _, word2 := range words2 { if word1 == word2 { matchCount++ break } } } // At least 70% of words must match threshold := 0.7 maxWords := len(words1) if len(words2) > maxWords { maxWords = len(words2) } return float64(matchCount)/float64(maxWords) >= threshold }