Goondex/internal/scraper/tpdb/scraper.go
Team Goon f7d82cd207 v0.1.0-dev2: Full TPDB integration with auto-fetch and comprehensive docs
Major Features:
-  Complete TPDB scraper implementation with real API calls
-  Auto-fetch on cache miss: search commands now automatically import from TPDB when not found locally
-  Comprehensive documentation (5 markdown files for Bookstack)
-  Import commands for performers, studios, and scenes
-  Fixed JSON type mismatches (aliases array, studio numeric IDs)

Changes:
1. TPDB Scraper (internal/scraper/tpdb/):
   - types.go: Full API response structures with correct types
     - PerformerResponse.Aliases: string → []string (TPDB returns array)
     - StudioResponse.ID: string → int (TPDB returns numeric IDs)
     - SiteInfo.ID: string → int (scenes reference studios by number)
   - mapper.go: Maps TPDB responses to internal models
     - Converts aliases array to comma-separated string
     - Converts numeric studio IDs to strings using strconv.Itoa()
   - scraper.go: Real HTTP client with Bearer token auth
     - SearchPerformers, SearchStudios, SearchScenes implemented
     - GetPerformerByID, GetStudioByID, GetSceneByID implemented

2. CLI Auto-Fetch (cmd/goondex/main.go):
   - performer-search: Auto-fetches from TPDB if local DB empty
   - studio-search: Auto-fetches from TPDB if local DB empty
   - scene-search: Auto-fetches basic metadata (no relationships)
   - Graceful handling of missing TPDB_API_KEY
   - Import → search again to get local IDs

3. Documentation (docs/):
   - INDEX.md: Documentation overview and navigation
   - ARCHITECTURE.md: System design, data flow, component diagrams
   - DATABASE_SCHEMA.md: Complete schema with relationships and indexes
   - CLI_REFERENCE.md: All commands with examples
   - TPDB_INTEGRATION.md: API guide, data mapping, best practices

4. Fixes:
   - .gitignore: Fixed pattern to allow cmd/goondex/* and cmd/goondexd/*
   - README: Updated to reflect TPDB integration and auto-fetch

Testing:
-  performer-search "Riley Reid" - auto-fetched 2 performers, cached
-  studio-search "Brazzers" - auto-fetched 12 studios, cached
-  Aliases now display correctly as comma-separated list
-  Studio IDs properly converted from numeric to string

API Integration:
- Base URL: https://api.theporndb.net
- Authentication: Bearer token via TPDB_API_KEY env var
- Endpoints: /performers, /sites, /scenes
- Rate limiting handled with warnings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-14 22:04:23 -05:00

175 lines
4.8 KiB
Go

package tpdb
import (
"context"
"encoding/json"
"fmt"
"net/url"
"git.leaktechnologies.dev/stu/Goondex/internal/model"
)
// Scraper implements the scraper.Scraper interface for TPDB
type Scraper struct {
client *Client
}
// NewScraper creates a new TPDB scraper
func NewScraper(baseURL, apiKey string) *Scraper {
return &Scraper{
client: NewClient(baseURL, apiKey),
}
}
// Name returns the scraper's unique identifier
func (s *Scraper) Name() string {
return "tpdb"
}
// SearchPerformers searches for performers by query string
func (s *Scraper) SearchPerformers(ctx context.Context, query string) ([]model.Performer, error) {
params := url.Values{}
params.Set("q", query)
body, err := s.client.get(ctx, "/performers", params)
if err != nil {
return nil, fmt.Errorf("failed to search performers: %w", err)
}
var apiResp APIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var tpdbPerformers []PerformerResponse
if err := json.Unmarshal(apiResp.Data, &tpdbPerformers); err != nil {
return nil, fmt.Errorf("failed to parse performers: %w", err)
}
performers := make([]model.Performer, 0, len(tpdbPerformers))
for _, p := range tpdbPerformers {
performers = append(performers, mapPerformer(p))
}
return performers, nil
}
// SearchStudios searches for studios by query string
func (s *Scraper) SearchStudios(ctx context.Context, query string) ([]model.Studio, error) {
params := url.Values{}
params.Set("q", query)
body, err := s.client.get(ctx, "/sites", params)
if err != nil {
return nil, fmt.Errorf("failed to search studios: %w", err)
}
var apiResp APIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var tpdbStudios []StudioResponse
if err := json.Unmarshal(apiResp.Data, &tpdbStudios); err != nil {
return nil, fmt.Errorf("failed to parse studios: %w", err)
}
studios := make([]model.Studio, 0, len(tpdbStudios))
for _, st := range tpdbStudios {
studios = append(studios, mapStudio(st))
}
return studios, nil
}
// SearchScenes searches for scenes by query string
func (s *Scraper) SearchScenes(ctx context.Context, query string) ([]model.Scene, error) {
params := url.Values{}
params.Set("q", query)
body, err := s.client.get(ctx, "/scenes", params)
if err != nil {
return nil, fmt.Errorf("failed to search scenes: %w", err)
}
var apiResp APIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var tpdbScenes []SceneResponse
if err := json.Unmarshal(apiResp.Data, &tpdbScenes); err != nil {
return nil, fmt.Errorf("failed to parse scenes: %w", err)
}
scenes := make([]model.Scene, 0, len(tpdbScenes))
for _, sc := range tpdbScenes {
scenes = append(scenes, mapScene(sc))
}
return scenes, nil
}
// GetSceneByID retrieves a scene by its remote ID
func (s *Scraper) GetSceneByID(ctx context.Context, remoteID string) (*model.Scene, error) {
body, err := s.client.get(ctx, "/scenes/"+remoteID, nil)
if err != nil {
return nil, fmt.Errorf("failed to get scene: %w", err)
}
var apiResp APIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var tpdbScene SceneResponse
if err := json.Unmarshal(apiResp.Data, &tpdbScene); err != nil {
return nil, fmt.Errorf("failed to parse scene: %w", err)
}
scene := mapScene(tpdbScene)
return &scene, nil
}
// GetPerformerByID retrieves a performer by its remote ID
func (s *Scraper) GetPerformerByID(ctx context.Context, remoteID string) (*model.Performer, error) {
body, err := s.client.get(ctx, "/performers/"+remoteID, nil)
if err != nil {
return nil, fmt.Errorf("failed to get performer: %w", err)
}
var apiResp APIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var tpdbPerformer PerformerResponse
if err := json.Unmarshal(apiResp.Data, &tpdbPerformer); err != nil {
return nil, fmt.Errorf("failed to parse performer: %w", err)
}
performer := mapPerformer(tpdbPerformer)
return &performer, nil
}
// GetStudioByID retrieves a studio by its remote ID
func (s *Scraper) GetStudioByID(ctx context.Context, remoteID string) (*model.Studio, error) {
body, err := s.client.get(ctx, "/sites/"+remoteID, nil)
if err != nil {
return nil, fmt.Errorf("failed to get studio: %w", err)
}
var apiResp APIResponse
if err := json.Unmarshal(body, &apiResp); err != nil {
return nil, fmt.Errorf("failed to parse response: %w", err)
}
var tpdbStudio StudioResponse
if err := json.Unmarshal(apiResp.Data, &tpdbStudio); err != nil {
return nil, fmt.Errorf("failed to parse studio: %w", err)
}
studio := mapStudio(tpdbStudio)
return &studio, nil
}