- Implement full web interface with Go html/template server - Add GX component library (buttons, dialogs, tables, forms, etc.) - Create scene/performer/studio/movie detail and listing pages - Add Adult Empire scraper for additional metadata sources - Implement movie support with database schema - Add import and sync services for data management - Include comprehensive API and frontend documentation - Add custom color scheme and responsive layout 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
310 lines
7.8 KiB
Go
310 lines
7.8 KiB
Go
package adultemp
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"git.leaktechnologies.dev/stu/Goondex/internal/model"
|
|
)
|
|
|
|
// Scraper implements Adult Empire scraping functionality
|
|
type Scraper struct {
|
|
client *Client
|
|
}
|
|
|
|
// NewScraper creates a new Adult Empire scraper
|
|
func NewScraper() (*Scraper, error) {
|
|
client, err := NewClient()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &Scraper{
|
|
client: client,
|
|
}, nil
|
|
}
|
|
|
|
// SetAuthToken sets the authentication token for the scraper
|
|
func (s *Scraper) SetAuthToken(etoken string) error {
|
|
return s.client.SetAuthToken(etoken)
|
|
}
|
|
|
|
// ScrapeSceneByURL scrapes a scene from its Adult Empire URL
|
|
func (s *Scraper) ScrapeSceneByURL(ctx context.Context, url string) (*SceneData, error) {
|
|
html, err := s.client.GetSceneByURL(ctx, url)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to fetch scene: %w", err)
|
|
}
|
|
|
|
parser, err := NewXPathParser(html)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse HTML: %w", err)
|
|
}
|
|
|
|
scene := &SceneData{
|
|
URL: url,
|
|
}
|
|
|
|
// Extract title
|
|
scene.Title = parser.QueryString("//h1[@class='title']")
|
|
|
|
// Extract date
|
|
dateStr := parser.QueryString("//div[@class='release-date']/text()")
|
|
scene.Date = ParseDate(dateStr)
|
|
|
|
// Extract studio
|
|
scene.Studio = parser.QueryString("//a[contains(@href, '/studio/')]/text()")
|
|
|
|
// Extract cover image
|
|
scene.Image = ExtractURL(
|
|
parser.QueryAttr("//div[@class='item-image']//img", "src"),
|
|
s.client.baseURL,
|
|
)
|
|
|
|
// Extract description
|
|
desc := parser.QueryString("//div[@class='synopsis']")
|
|
scene.Description = CleanText(desc)
|
|
|
|
// Extract performers
|
|
scene.Performers = parser.QueryStrings("//a[contains(@href, '/performer/')]/text()")
|
|
|
|
// Extract tags/categories
|
|
scene.Tags = parser.QueryStrings("//a[contains(@href, '/category/')]/text()")
|
|
|
|
// Extract code/SKU
|
|
scene.Code = parser.QueryString("//span[@class='sku']/text()")
|
|
|
|
// Extract director
|
|
scene.Director = parser.QueryString("//a[contains(@href, '/director/')]/text()")
|
|
|
|
return scene, nil
|
|
}
|
|
|
|
// SearchScenesByName searches for scenes by title
|
|
func (s *Scraper) SearchScenesByName(ctx context.Context, query string) ([]SearchResult, error) {
|
|
html, err := s.client.SearchScenes(ctx, query)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to search scenes: %w", err)
|
|
}
|
|
|
|
parser, err := NewXPathParser(html)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse HTML: %w", err)
|
|
}
|
|
|
|
var results []SearchResult
|
|
|
|
// Extract search result items using official Stash scraper XPath
|
|
// Title: //a[@class="boxcover"]/img/@title
|
|
// URL: //a[@class="boxcover"]/@href
|
|
// Image: //a[@class="boxcover"]/img/@src
|
|
titles := parser.QueryAttrs("//a[@class='boxcover']/img", "title")
|
|
urls := parser.QueryAttrs("//a[@class='boxcover']", "href")
|
|
images := parser.QueryAttrs("//a[@class='boxcover']/img", "src")
|
|
|
|
for i := range titles {
|
|
result := SearchResult{
|
|
Title: titles[i],
|
|
}
|
|
|
|
if i < len(urls) {
|
|
result.URL = ExtractURL(urls[i], s.client.baseURL)
|
|
}
|
|
|
|
if i < len(images) {
|
|
result.Image = ExtractURL(images[i], s.client.baseURL)
|
|
}
|
|
|
|
results = append(results, result)
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// ScrapePerformerByURL scrapes a performer from their Adult Empire URL
|
|
func (s *Scraper) ScrapePerformerByURL(ctx context.Context, url string) (*PerformerData, error) {
|
|
html, err := s.client.GetPerformerByURL(ctx, url)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to fetch performer: %w", err)
|
|
}
|
|
|
|
parser, err := NewXPathParser(html)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse HTML: %w", err)
|
|
}
|
|
|
|
performer := &PerformerData{
|
|
URL: url,
|
|
}
|
|
|
|
// Extract name
|
|
performer.Name = parser.QueryString("//h1[@class='performer-name']")
|
|
|
|
// Extract image
|
|
performer.Image = ExtractURL(
|
|
parser.QueryAttr("//div[@class='performer-image']//img", "src"),
|
|
s.client.baseURL,
|
|
)
|
|
|
|
// Extract birthdate
|
|
performer.Birthdate = parser.QueryString("//span[@class='birthdate']/text()")
|
|
|
|
// Extract ethnicity
|
|
performer.Ethnicity = parser.QueryString("//span[@class='ethnicity']/text()")
|
|
|
|
// Extract country
|
|
performer.Country = parser.QueryString("//span[@class='country']/text()")
|
|
|
|
// Extract height
|
|
heightStr := parser.QueryString("//span[@class='height']/text()")
|
|
if heightStr != "" {
|
|
height := ParseHeight(heightStr)
|
|
if height > 0 {
|
|
performer.Height = fmt.Sprintf("%d cm", height)
|
|
}
|
|
}
|
|
|
|
// Extract measurements
|
|
performer.Measurements = parser.QueryString("//span[@class='measurements']/text()")
|
|
|
|
// Extract hair color
|
|
performer.HairColor = parser.QueryString("//span[@class='hair-color']/text()")
|
|
|
|
// Extract eye color
|
|
performer.EyeColor = parser.QueryString("//span[@class='eye-color']/text()")
|
|
|
|
// Extract biography
|
|
bio := parser.QueryString("//div[@class='bio']")
|
|
performer.Biography = CleanText(bio)
|
|
|
|
// Extract aliases
|
|
aliasStr := parser.QueryString("//span[@class='aliases']/text()")
|
|
if aliasStr != "" {
|
|
// Split by comma
|
|
for _, alias := range splitByComma(aliasStr) {
|
|
performer.Aliases = append(performer.Aliases, alias)
|
|
}
|
|
}
|
|
|
|
return performer, nil
|
|
}
|
|
|
|
// SearchPerformersByName searches for performers by name
|
|
func (s *Scraper) SearchPerformersByName(ctx context.Context, name string) ([]SearchResult, error) {
|
|
html, err := s.client.SearchPerformers(ctx, name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to search performers: %w", err)
|
|
}
|
|
|
|
parser, err := NewXPathParser(html)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse HTML: %w", err)
|
|
}
|
|
|
|
var results []SearchResult
|
|
|
|
// Extract performer search results using official Stash scraper XPath
|
|
// Root: //div[@id="performerlist"]//a
|
|
// Name: @label attribute
|
|
// URL: @href attribute
|
|
names := parser.QueryAttrs("//div[@id='performerlist']//a", "label")
|
|
urls := parser.QueryAttrs("//div[@id='performerlist']//a", "href")
|
|
images := parser.QueryAttrs("//div[@id='performerlist']//a//img", "src")
|
|
|
|
for i := range names {
|
|
result := SearchResult{
|
|
Title: names[i],
|
|
}
|
|
|
|
if i < len(urls) {
|
|
result.URL = ExtractURL(urls[i], s.client.baseURL)
|
|
}
|
|
|
|
if i < len(images) {
|
|
result.Image = ExtractURL(images[i], s.client.baseURL)
|
|
}
|
|
|
|
results = append(results, result)
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// ConvertSceneToModel converts SceneData to Goondex model.Scene
|
|
func (s *Scraper) ConvertSceneToModel(data *SceneData) *model.Scene {
|
|
scene := &model.Scene{
|
|
Title: data.Title,
|
|
URL: data.URL,
|
|
Date: data.Date,
|
|
Description: data.Description,
|
|
ImageURL: data.Image,
|
|
Code: data.Code,
|
|
Director: data.Director,
|
|
Source: "adultemp",
|
|
SourceID: ExtractID(data.URL),
|
|
}
|
|
|
|
// Studio will need to be looked up/created separately
|
|
// Performers will need to be looked up/created separately
|
|
// Tags will need to be looked up/created separately
|
|
|
|
return scene
|
|
}
|
|
|
|
// ConvertPerformerToModel converts PerformerData to Goondex model.Performer
|
|
func (s *Scraper) ConvertPerformerToModel(data *PerformerData) *model.Performer {
|
|
performer := &model.Performer{
|
|
Name: data.Name,
|
|
ImageURL: data.Image,
|
|
Birthday: data.Birthdate,
|
|
Ethnicity: data.Ethnicity,
|
|
Country: data.Country,
|
|
Measurements: data.Measurements,
|
|
HairColor: data.HairColor,
|
|
EyeColor: data.EyeColor,
|
|
Bio: data.Biography,
|
|
Source: "adultemp",
|
|
SourceID: ExtractID(data.URL),
|
|
}
|
|
|
|
// Parse height if available
|
|
if data.Height != "" {
|
|
height := ParseHeight(data.Height)
|
|
if height > 0 {
|
|
performer.Height = height
|
|
}
|
|
}
|
|
|
|
// Join aliases
|
|
if len(data.Aliases) > 0 {
|
|
performer.Aliases = joinStrings(data.Aliases, ", ")
|
|
}
|
|
|
|
return performer
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
func splitByComma(s string) []string {
|
|
var result []string
|
|
parts := strings.Split(s, ",")
|
|
for _, part := range parts {
|
|
trimmed := strings.TrimSpace(part)
|
|
if trimmed != "" {
|
|
result = append(result, trimmed)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func joinStrings(strs []string, sep string) string {
|
|
var nonEmpty []string
|
|
for _, s := range strs {
|
|
if s != "" {
|
|
nonEmpty = append(nonEmpty, s)
|
|
}
|
|
}
|
|
return strings.Join(nonEmpty, sep)
|
|
}
|