fix: stabilize import paths and CLI alias system

This commit is contained in:
Team Goon 2025-11-06 11:14:26 -05:00
parent 24c56d2221
commit 1aa4818ed4
13 changed files with 197 additions and 691 deletions

74
.gitignore vendored Normal file
View File

@ -0,0 +1,74 @@
# ============================================================
# Goondex: Large / Sensitive Data Exclusions
# ============================================================
# ============================================================
# 🖼️ Gallery Assets
# ------------------------------------------------------------
# Exclude all image and video content from galleries.
# Keep only metadata.json and tag YAMLs for database integrity.
# ============================================================
Galleries/**/*.jpg
Galleries/**/*.jpeg
Galleries/**/*.png
Galleries/**/*.gif
Galleries/**/*.webp
Galleries/**/*.mp4
Galleries/**/*.mov
Galleries/**/*.avi
Galleries/**/*.mkv
Galleries/**/*.zip
Galleries/**/*.tar
# Keep metadata and tag files
!Galleries/**/metadata.json
!Galleries/**/tags.yaml
!Galleries/README.md
# ============================================================
# 🧠 Machine Learning Artifacts
# ------------------------------------------------------------
# Model weights, generated outputs, and cached intermediate data.
# ============================================================
src/ml/models/*.pth
src/ml/outputs/**
src/ml/cache/**
src/ml/__pycache__/
src/ml/__pycache__/*
# ============================================================
# 🧰 Build / Environment Files
# ------------------------------------------------------------
# Prevent committing environment-specific or temporary files.
# ============================================================
__pycache__/
*.pyc
*.pyo
*.pyd
*.log
*.tmp
*.bak
*.swp
# Virtual environments
.venv/
env/
venv/
# ============================================================
# 🧱 Project-Level Exclusions
# ------------------------------------------------------------
# Prevent committing local configurations or system files.
# ============================================================
.DS_Store
Thumbs.db
.idea/
.vscode/
*.iml
# ============================================================
# 🧾 Dependency and Build Outputs
# ============================================================
*.egg-info/
build/
dist/

View File

@ -1,324 +0,0 @@
SSUUMMMMAARRYY OOFF LLEESSSS CCOOMMMMAANNDDSS
Commands marked with * may be preceded by a number, _N.
Notes in parentheses indicate the behavior if _N is given.
A key preceded by a caret indicates the Ctrl key; thus ^K is ctrl-K.
h H Display this help.
q :q Q :Q ZZ Exit.
---------------------------------------------------------------------------
MMOOVVIINNGG
e ^E j ^N CR * Forward one line (or _N lines).
y ^Y k ^K ^P * Backward one line (or _N lines).
ESC-j * Forward one file line (or _N file lines).
ESC-k * Backward one file line (or _N file lines).
f ^F ^V SPACE * Forward one window (or _N lines).
b ^B ESC-v * Backward one window (or _N lines).
z * Forward one window (and set window to _N).
w * Backward one window (and set window to _N).
ESC-SPACE * Forward one window, but don't stop at end-of-file.
ESC-b * Backward one window, but don't stop at beginning-of-file.
d ^D * Forward one half-window (and set half-window to _N).
u ^U * Backward one half-window (and set half-window to _N).
ESC-) RightArrow * Right one half screen width (or _N positions).
ESC-( LeftArrow * Left one half screen width (or _N positions).
ESC-} ^RightArrow Right to last column displayed.
ESC-{ ^LeftArrow Left to first column.
F Forward forever; like "tail -f".
ESC-F Like F but stop when search pattern is found.
r ^R ^L Repaint screen.
R Repaint screen, discarding buffered input.
---------------------------------------------------
Default "window" is the screen height.
Default "half-window" is half of the screen height.
---------------------------------------------------------------------------
SSEEAARRCCHHIINNGG
/_p_a_t_t_e_r_n * Search forward for (_N-th) matching line.
?_p_a_t_t_e_r_n * Search backward for (_N-th) matching line.
n * Repeat previous search (for _N-th occurrence).
N * Repeat previous search in reverse direction.
ESC-n * Repeat previous search, spanning files.
ESC-N * Repeat previous search, reverse dir. & spanning files.
^O^N ^On * Search forward for (_N-th) OSC8 hyperlink.
^O^P ^Op * Search backward for (_N-th) OSC8 hyperlink.
^O^L ^Ol Jump to the currently selected OSC8 hyperlink.
ESC-u Undo (toggle) search highlighting.
ESC-U Clear search highlighting.
&_p_a_t_t_e_r_n * Display only matching lines.
---------------------------------------------------
Search is case-sensitive unless changed with -i or -I.
A search pattern may begin with one or more of:
^N or ! Search for NON-matching lines.
^E or * Search multiple files (pass thru END OF FILE).
^F or @ Start search at FIRST file (for /) or last file (for ?).
^K Highlight matches, but don't move (KEEP position).
^R Don't use REGULAR EXPRESSIONS.
^S _n Search for match in _n-th parenthesized subpattern.
^W WRAP search if no match found.
^L Enter next character literally into pattern.
---------------------------------------------------------------------------
JJUUMMPPIINNGG
g < ESC-< * Go to first line in file (or line _N).
G > ESC-> * Go to last line in file (or line _N).
p % * Go to beginning of file (or _N percent into file).
t * Go to the (_N-th) next tag.
T * Go to the (_N-th) previous tag.
{ ( [ * Find close bracket } ) ].
} ) ] * Find open bracket { ( [.
ESC-^F _<_c_1_> _<_c_2_> * Find close bracket _<_c_2_>.
ESC-^B _<_c_1_> _<_c_2_> * Find open bracket _<_c_1_>.
---------------------------------------------------
Each "find close bracket" command goes forward to the close bracket
matching the (_N-th) open bracket in the top line.
Each "find open bracket" command goes backward to the open bracket
matching the (_N-th) close bracket in the bottom line.
m_<_l_e_t_t_e_r_> Mark the current top line with <letter>.
M_<_l_e_t_t_e_r_> Mark the current bottom line with <letter>.
'_<_l_e_t_t_e_r_> Go to a previously marked position.
'' Go to the previous position.
^X^X Same as '.
ESC-m_<_l_e_t_t_e_r_> Clear a mark.
---------------------------------------------------
A mark is any upper-case or lower-case letter.
Certain marks are predefined:
^ means beginning of the file
$ means end of the file
---------------------------------------------------------------------------
CCHHAANNGGIINNGG FFIILLEESS
:e [_f_i_l_e] Examine a new file.
^X^V Same as :e.
:n * Examine the (_N-th) next file from the command line.
:p * Examine the (_N-th) previous file from the command line.
:x * Examine the first (or _N-th) file from the command line.
^O^O Open the currently selected OSC8 hyperlink.
:d Delete the current file from the command line list.
= ^G :f Print current file name.
---------------------------------------------------------------------------
MMIISSCCEELLLLAANNEEOOUUSS CCOOMMMMAANNDDSS
-_<_f_l_a_g_> Toggle a command line option [see OPTIONS below].
--_<_n_a_m_e_> Toggle a command line option, by name.
__<_f_l_a_g_> Display the setting of a command line option.
___<_n_a_m_e_> Display the setting of an option, by name.
+_c_m_d Execute the less cmd each time a new file is examined.
!_c_o_m_m_a_n_d Execute the shell command with $SHELL.
#_c_o_m_m_a_n_d Execute the shell command, expanded like a prompt.
|XX_c_o_m_m_a_n_d Pipe file between current pos & mark XX to shell command.
s _f_i_l_e Save input to a file.
v Edit the current file with $VISUAL or $EDITOR.
V Print version number of "less".
---------------------------------------------------------------------------
OOPPTTIIOONNSS
Most options may be changed either on the command line,
or from within less by using the - or -- command.
Options may be given in one of two forms: either a single
character preceded by a -, or a name preceded by --.
-? ........ --help
Display help (from command line).
-a ........ --search-skip-screen
Search skips current screen.
-A ........ --SEARCH-SKIP-SCREEN
Search starts just after target line.
-b [_N] .... --buffers=[_N]
Number of buffers.
-B ........ --auto-buffers
Don't automatically allocate buffers for pipes.
-c ........ --clear-screen
Repaint by clearing rather than scrolling.
-d ........ --dumb
Dumb terminal.
-D xx_c_o_l_o_r . --color=xx_c_o_l_o_r
Set screen colors.
-e -E .... --quit-at-eof --QUIT-AT-EOF
Quit at end of file.
-f ........ --force
Force open non-regular files.
-F ........ --quit-if-one-screen
Quit if entire file fits on first screen.
-g ........ --hilite-search
Highlight only last match for searches.
-G ........ --HILITE-SEARCH
Don't highlight any matches for searches.
-h [_N] .... --max-back-scroll=[_N]
Backward scroll limit.
-i ........ --ignore-case
Ignore case in searches that do not contain uppercase.
-I ........ --IGNORE-CASE
Ignore case in all searches.
-j [_N] .... --jump-target=[_N]
Screen position of target lines.
-J ........ --status-column
Display a status column at left edge of screen.
-k _f_i_l_e ... --lesskey-file=_f_i_l_e
Use a compiled lesskey file.
-K ........ --quit-on-intr
Exit less in response to ctrl-C.
-L ........ --no-lessopen
Ignore the LESSOPEN environment variable.
-m -M .... --long-prompt --LONG-PROMPT
Set prompt style.
-n ......... --line-numbers
Suppress line numbers in prompts and messages.
-N ......... --LINE-NUMBERS
Display line number at start of each line.
-o [_f_i_l_e] .. --log-file=[_f_i_l_e]
Copy to log file (standard input only).
-O [_f_i_l_e] .. --LOG-FILE=[_f_i_l_e]
Copy to log file (unconditionally overwrite).
-p _p_a_t_t_e_r_n . --pattern=[_p_a_t_t_e_r_n]
Start at pattern (from command line).
-P [_p_r_o_m_p_t] --prompt=[_p_r_o_m_p_t]
Define new prompt.
-q -Q .... --quiet --QUIET --silent --SILENT
Quiet the terminal bell.
-r -R .... --raw-control-chars --RAW-CONTROL-CHARS
Output "raw" control characters.
-s ........ --squeeze-blank-lines
Squeeze multiple blank lines.
-S ........ --chop-long-lines
Chop (truncate) long lines rather than wrapping.
-t _t_a_g .... --tag=[_t_a_g]
Find a tag.
-T [_t_a_g_s_f_i_l_e] --tag-file=[_t_a_g_s_f_i_l_e]
Use an alternate tags file.
-u -U .... --underline-special --UNDERLINE-SPECIAL
Change handling of backspaces, tabs and carriage returns.
-V ........ --version
Display the version number of "less".
-w ........ --hilite-unread
Highlight first new line after forward-screen.
-W ........ --HILITE-UNREAD
Highlight first new line after any forward movement.
-x [_N[,...]] --tabs=[_N[,...]]
Set tab stops.
-X ........ --no-init
Don't use termcap init/deinit strings.
-y [_N] .... --max-forw-scroll=[_N]
Forward scroll limit.
-z [_N] .... --window=[_N]
Set size of window.
-" [_c[_c]] . --quotes=[_c[_c]]
Set shell quote characters.
-~ ........ --tilde
Don't display tildes after end of file.
-# [_N] .... --shift=[_N]
Set horizontal scroll amount (0 = one half screen width).
--exit-follow-on-close
Exit F command on a pipe when writer closes pipe.
--file-size
Automatically determine the size of the input file.
--follow-name
The F command changes files if the input file is renamed.
--form-feed
Stop scrolling when a form feed character is reached.
--header=[_L[,_C[,_N]]]
Use _L lines (starting at line _N) and _C columns as headers.
--incsearch
Search file as each pattern character is typed in.
--intr=[_C]
Use _C instead of ^X to interrupt a read.
--lesskey-context=_t_e_x_t
Use lesskey source file contents.
--lesskey-src=_f_i_l_e
Use a lesskey source file.
--line-num-width=[_N]
Set the width of the -N line number field to _N characters.
--match-shift=[_N]
Show at least _N characters to the left of a search match.
--modelines=[_N]
Read _N lines from the input file and look for vim modelines.
--mouse
Enable mouse input.
--no-edit-warn
Don't warn when using v command on a file opened via LESSOPEN.
--no-keypad
Don't send termcap keypad init/deinit strings.
--no-histdups
Remove duplicates from command history.
--no-number-headers
Don't give line numbers to header lines.
--no-paste
Ignore pasted input.
--no-search-header-lines
Searches do not include header lines.
--no-search-header-columns
Searches do not include header columns.
--no-search-headers
Searches do not include header lines or columns.
--no-vbell
Disable the terminal's visual bell.
--redraw-on-quit
Redraw final screen when quitting.
--rscroll=[_C]
Set the character used to mark truncated lines.
--save-marks
Retain marks across invocations of less.
--search-options=[EFKNRW-]
Set default options for every search.
--show-preproc-errors
Display a message if preprocessor exits with an error status.
--proc-backspace
Process backspaces for bold/underline.
--PROC-BACKSPACE
Treat backspaces as control characters.
--proc-return
Delete carriage returns before newline.
--PROC-RETURN
Treat carriage returns as control characters.
--proc-tab
Expand tabs to spaces.
--PROC-TAB
Treat tabs as control characters.
--status-col-width=[_N]
Set the width of the -J status column to _N characters.
--status-line
Highlight or color the entire line containing a mark.
--use-backslash
Subsequent options use backslash as escape char.
--use-color
Enables colored text.
--wheel-lines=[_N]
Each click of the mouse wheel moves _N lines.
--wordwrap
Wrap lines at spaces.
---------------------------------------------------------------------------
LLIINNEE EEDDIITTIINNGG
These keys can be used to edit text being entered
on the "command line" at the bottom of the screen.
RightArrow ..................... ESC-l ... Move cursor right one character.
LeftArrow ...................... ESC-h ... Move cursor left one character.
ctrl-RightArrow ESC-RightArrow ESC-w ... Move cursor right one word.
ctrl-LeftArrow ESC-LeftArrow ESC-b ... Move cursor left one word.
HOME ........................... ESC-0 ... Move cursor to start of line.
END ............................ ESC-$ ... Move cursor to end of line.
BACKSPACE ................................ Delete char to left of cursor.
DELETE ......................... ESC-x ... Delete char under cursor.
ctrl-BACKSPACE ESC-BACKSPACE ........... Delete word to left of cursor.
ctrl-DELETE .... ESC-DELETE .... ESC-X ... Delete word under cursor.
ctrl-U ......... ESC (MS-DOS only) ....... Delete entire line.
UpArrow ........................ ESC-k ... Retrieve previous command line.
DownArrow ...................... ESC-j ... Retrieve next command line.
TAB ...................................... Complete filename & cycle.
SHIFT-TAB ...................... ESC-TAB Complete filename & reverse cycle.
ctrl-L ................................... Complete filename, list all.

View File

@ -1,309 +0,0 @@
# 📜 Porndex Importer — Full Changelog
> **Project:** Porndex_PornpicsImporter
> **Repository:** Leak Technologies
> **Branch:** main
> **Version Line:** v0.3.x Development Cycle
---
## [v0.3.0] — Modular Tagging Framework Foundation (2025-10-18)
### ✨ Added
- Introduced **YAML-based Tag Dictionaries** stored under `/src/importer/tagging/` for modular, human-readable tag definitions.
- Implemented initial **`refresh-all`** and **`refresh-one`** commands for reapplying tag inference to galleries.
- Added **persistent `inferred_tags` field** in `metadata.json` to differentiate between automated and manual tags.
- Implemented **automatic source inference** for known networks (e.g., Brazzers, FTV Girls, PornPics).
- Enhanced CLI output with colorized progress indicators and summary totals.
### 🛠 Changed
- Refactored `tag_gallery.py` for modular tagging architecture.
- Centralized configuration paths to `/src/importer/config/` for easier project-wide access.
### 🧹 Maintenance
- Improved exception handling for missing or malformed tag dictionaries.
- Added consistent emoji/logging system across CLI commands.
---
## [v0.3.1] — CLI Polishing & Dictionary Improvements (2025-10-19)
### ✨ Added
- Introduced **CLI argument parsing** with `argparse` for a unified user interface.
- Added `--verbose` flag for detailed debugging output.
- Added **metadata validation** to ensure all tag dictionaries contain unique keywords.
### 🛠 Changed
- Adjusted internal path resolution to work from both installed and development environments.
- Improved `load_all_tag_maps()` with caching and better error resilience.
### 🧹 Maintenance
- Cleaned duplicate mappings within YAML files.
- Improved documentation and inline docstrings throughout importer modules.
---
## [v0.3.2] — TPDB Bridge Integration (2025-10-21)
### ✨ Added
- Introduced **`tpdb_bridge.py`** for importing performer data from *ThePornDB* API.
- Added local **SQLite performer database** under `/src/importer/db/performers.db`.
- Added commands:
- `fetch` — Import performers in a single batch.
- `fill-index` — Continuously pull until a limit is reached.
- `enrich` — Fetch and merge extended performer metadata.
- `sync-all` — Hybrid incremental fetch + enrich loop.
- Introduced **local API key management** using `tpdb_api_key.txt` under `/secrets/`.
### 🧹 Maintenance
- Verified importer against TPDB rate limits and ensured safe error recovery.
- Added initial test data exports to `/src/importer/reports/`.
---
## [v0.3.3] — YAML Tag Inference Update (2025-10-20)
### ✨ Added
- Dynamic **YAML tag dictionary loader** for modular tag categories.
- Introduced **automatic source inference** for common networks.
- Added **`refresh-all`** bulk operation to reapply tag inference globally.
### 🛠 Changed
- Refactored `infer_tags()` to merge results from multiple YAML files dynamically.
- Enhanced progress and summary reporting for tag inference.
### 🧹 Maintenance
- Fixed `AttributeError: 'int' object has no attribute 'lower'` when parsing numeric YAML values.
- Standardized internal naming conventions.
---
## [v0.3.4] — Tag Dictionary Validation & Cleanup (2025-10-20)
### ✨ Added
- **`validate-tags`** CLI command for verifying YAML tag dictionaries.
- Detects duplicates, empty entries, and conflicting keywords.
- Outputs detailed summaries with per-keyword conflict listings.
### 🛠 Changed
- Standardized YAML structure enforcement (consistent key capitalization and layout).
- Added human-readable validation summaries.
### 🧹 Maintenance
- General code cleanup and consistent logging system updates.
---
## [v0.3.5] — Tag Statistics & Unified CLI Update (2025-10-20)
### ✨ Added
- **Tag Statistics System**
- Introduced `tag-stats` command to generate frequency analytics across all gallery metadata.
- Produces both console summaries and saved reports:
- `reports/tag_stats.json` — JSON-formatted tag counts.
- `reports/tag_stats_sorted.txt` — human-readable ranked list.
- **Unified CLI Interface (`cli.py`)**
- Consolidated all tagging and maintenance operations into a single entrypoint:
- `refresh-all`, `refresh-one`, `validate-tags`, `tag-stats`, `list`, `list-tags`, `add`, `remove`, `add-multi`, `show-metadata`, `source`
- Standardized command syntax and output formatting across all operations.
### 🛠 Changed
- Centralized tag frequency logic into `tag_gallery.py`.
- Refactored CLI dispatch system for scalability and better error handling.
- Standardized output style (headers, dividers, alignment).
### 🧹 Maintenance
- Automatic creation of `/src/importer/reports/` when missing.
- Verified all tag operations across 60+ galleries.
- Unified terminology and capitalization across CLI help text and docstrings.
### 🧭 Next Steps
- Add color-coded CLI output for readability.
- Implement `--export-csv` flag for `tag-stats` output.
- Begin roadmap for **v0.4.0** introducing ML-based tag confidence scoring and category weighting.
---
## [v0.3.6] — Enrichment Verification & Freshness Tracking (2025-10-26)
### ✨ Added
- **verify-enrichment command**
- Scans performer database for missing metadata (e.g., `url`, `last_updated`).
- Reports enriched vs incomplete entries, with preview via `--show-missing`.
- **Freshness tracking**
- Displays oldest and most recent enrichment timestamps.
- Warns if data is older than the freshness threshold.
- **Automatic TPDB key validation**
- Checks for valid API key and provides setup help if missing.
### 🛠 Changed
- Enrichment logic now guarantees `url` and `last_updated` fields for all performers.
- Improved emoji-based CLI logs for clarity.
- CLI outputs enrichment stats after each batch during `sync-all`.
### 🧹 Maintenance
- Cleanup and refactor of `tpdb_bridge.py` for readability and modular design.
- Verified completeness: **5,087 performers enriched** and up to date.
- Improved sleep timing and network error recovery during long sync runs.
### 🧭 Next Steps
- Add `--stale-days` CLI flag for user-defined freshness thresholds.
- Implement automatic enrichment scheduling via cron or systemd.
- Add shortcut alias `porndex-importer verify` for database status checks.
---
[v0.3.7] — Scene-Based Enrichment & Channel Auto-Upgrade (2025-10-26)
✨ Added
Scene-based enrichment system
New flag --use-scenes enables intelligent inference of performer studios/channels using recent scene data from ThePornDB.
Automatically scans /performers/{id}/scenes for studio, site, or network fields when direct metadata is missing.
Dynamically upgrades performer entries from “Unknown” to valid channel names (e.g., “Desire Room”, “I Want Clips: Princess Chanel”).
Enhanced enrichment diagnostics
--debug-channels now outputs detailed channel inference logs with origin type (e.g., “via scene” or “via performer metadata”).
Emoji-coded output for improved clarity:
🎞 Scene-based upgrades
🎬 Direct metadata
⚫ Missing channel info
Progress verification
verify-enrichment now reports precise completion percentages and lists the most recent 20 upgraded performers.
🛠 Changed
Enrichment process now performs automatic in-place upgrades of performer_sources without overwriting other fields.
Optimized query logic to prioritize unverified performers and handle large datasets efficiently.
Added fine-grained sleep control between API requests to stay compliant with TPDB rate limits.
🧹 Maintenance
Refactored enrichment functions for modularity:
_fetch_studio_from_scenes() introduced for scene scanning.
Simplified argument handling and enriched exception tracing.
Verified enrichment stability across 100 performers with 44% successful channel discovery in live test.
Improved timestamp consistency in verification logs and upgraded database schema resilience.
[v0.4.2] — Unified Importer, ML Pipeline, and Semantic Search (2025-10-27)
✨ Added
Unified Importer CLI (porndex-importer)
Replaces legacy multi-script workflow with a single command entrypoint.
Introduced import, refresh-all, refresh-one, validate-tags, tag-stats, and source subcommands.
Includes colorized CLI summaries and consistent emoji headers.
Machine Learning Dataset Builder
New module: ml/ml_dataset_builder.py
Generates structured dataset in ML/porndex_dataset.jsonl from all indexed galleries.
Each record includes title, models, tags, and image paths for hybrid ML ingestion.
Embedding Generation Module
Added ml/ml_embeddings.py to create hybrid text + image embeddings.
Builds per-gallery NPZ files under ML/embeddings/ and a consolidated embeddings_index.jsonl.
Supports configurable --img-samples and automatic device detection (--device auto).
Semantic & Strict Search
search command supports three modes:
semantic: CLIP + text hybrid cosine similarity (default)
text: text-only vector space search
strict: literal match filtering before vector ranking
Results show top-ranked galleries, confidence scores, and gallery IDs.
ML Verification Command
verify confirms index consistency, embedding count, and file integrity.
Directory Auto-Creation
Automatically generates ML/embeddings/ and ML/ if missing.
🛠 Changed
Importer Pipeline Refactor
Moved all CLI handling into src/importer/cli.py.
Centralized environment setup and config loading.
Replaced direct Python script calls with porndex-importer entrypoint.
Tagging System
Unified YAML dictionary loading for clothing, acts, body, and context.
Improved tag inference logging and duplicate suppression.
Output Formatting
Standardized headers, dividers, and indentation across all CLI commands.
Added readable time and path indicators for long-running operations.
🧹 Maintenance
Verified full ML dataset build across 150 test galleries (100% JSONL completion).
Added fallback for empty or missing image lists in dataset builder.
Improved error handling for partial downloads and interrupted imports.
Streamlined path resolution for consistent operation across dev and installed modes.
Updated documentation:
/docs/CLI_USAGE.md rewritten for v0.4.2.
/README.md modernized with full project tree and ML pipeline overview.
🧭 Next Steps
Begin v0.4.3v0.5.x roadmap:
Integrate GroundingDINO + GroundedSAM for visual region detection.
Implement attribute extraction (gender → ethnicity → clothing).
Build visual verification tool (ml_dataset_inspector.py).
Add tag-confidence weighting system.
Extend TPDB bridge to cross-link enriched performer metadata into ML training records.
🧩 Summary of Current State (as of v0.4.2)
✅ Fully unified CLI under porndex-importer
✅ Stable YAML tagging + validation
✅ Complete ML dataset and embedding generation workflow
✅ Working hybrid semantic search
✅ Verified 150-gallery dataset index
© 2025 Leak Technologies — Porndex Importer Project

View File

@ -0,0 +1 @@
0.3.2

View File

@ -2,3 +2,4 @@ requests
beautifulsoup4
lxml
tqdm
pyyaml

View File

@ -2,30 +2,30 @@
# ============================================================
# File: src/importer/cli.py
# Description:
# Porndex CLI
# Goondex CLI
# Unified command-line interface for gallery importing,
# tagging, metadata refresh, and analytics with color-coded output.
# ============================================================
"""
Porndex CLI
Goondex CLI
===========
Unified command-line interface for gallery importing, tagging,
metadata refresh, and tag analytics with color-coded output.
Usage examples:
porndex import "https://www.pornpics.com/galleries/..."
porndex refresh-all
porndex refresh-one "Gallery_Name"
porndex validate-tags
porndex tag-stats
goondex import "https://www.pornpics.com/galleries/..."
goondex refresh-all
goondex refresh-one "Gallery_Name"
goondex validate-tags
goondex tag-stats
"""
import sys
import os
from importer import tag_gallery
from importer.tag_utils import validate_tag_dictionaries
from importer.gallery_importer import import_gallery # ⬅ NEW import
from src.importer import tag_gallery
from src.importer.tag_utils import validate_tag_dictionaries
from src.importer.gallery_importer import import_gallery
# ───────────────────────────────────────────────
@ -42,19 +42,40 @@ class Colors:
def colorize(text, color):
"""Helper to wrap text with ANSI color codes."""
"""Wrap text with ANSI color codes."""
return f"{color}{text}{Colors.RESET}"
# ───────────────────────────────────────────────
# VERSION HANDLER
# ───────────────────────────────────────────────
def load_version():
"""Safely read version number from root VERSION file."""
try:
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
version_file = os.path.join(root_dir, "VERSION")
with open(version_file, "r", encoding="utf-8") as f:
return f.read().strip()
except FileNotFoundError:
return "unknown"
def print_version():
"""Print Goondex version string."""
version = load_version()
print(colorize(f"Goondex v{version}", Colors.CYAN))
sys.exit(0)
# ───────────────────────────────────────────────
# USAGE HELP
# ───────────────────────────────────────────────
def print_usage():
print(colorize("\nPorndex CLI", Colors.BOLD))
print(colorize("\nGoondex CLI", Colors.BOLD))
print(colorize("─────────────────────────────────────", Colors.CYAN))
print("""
Usage:
porndex <command> [args...]
goondex <command> [args...]
Commands:
import <url> Import a new gallery from PornPics
@ -70,11 +91,15 @@ Commands:
show-metadata <folder> Show metadata.json content
source <folder|bulk> set <src> Set gallery source (single or all)
Flags:
-h, --help Show this help message
-v, --version Show current version
Examples:
porndex import "https://www.pornpics.com/galleries/example/"
porndex refresh-one "20251020_2117_Mariella_Sun"
porndex tag-stats
porndex validate-tags
goondex import "https://www.pornpics.com/galleries/example/"
goondex refresh-one "20251020_2117_Mariella_Sun"
goondex tag-stats
goondex validate-tags
""".strip())
@ -90,27 +115,30 @@ def main():
args = sys.argv[2:]
# ─────────────────────────────
# HELP FLAG SUPPORT
# HELP / VERSION FLAGS
# ─────────────────────────────
if cmd in ("--help", "-h", "help"):
print_usage()
sys.exit(0)
if cmd in ("--version", "-v", "version"):
print_version()
# ─────────────────────────────
# IMPORT NEW GALLERY
# ─────────────────────────────
if cmd == "import":
if not args:
print(colorize("[!] Usage: porndex import <gallery_url>", Colors.RED))
print(colorize("[!] Usage: goondex import <gallery_url>", Colors.RED))
sys.exit(1)
url = args[0]
print(colorize(f"[🌐] Importing gallery from URL: {url}", Colors.CYAN))
print(colorize(f"[INFO] Importing gallery from URL: {url}", Colors.CYAN))
try:
import_gallery(url)
print(colorize("[] Gallery import completed successfully.", Colors.GREEN))
print(colorize("[OK] Gallery import completed successfully.", Colors.GREEN))
except Exception as e:
print(colorize(f"[] Import failed: {e}", Colors.RED))
print(colorize(f"[ERROR] Import failed: {e}", Colors.RED))
sys.exit(1)
return
@ -118,25 +146,25 @@ def main():
# REFRESH COMMANDS
# ─────────────────────────────
if cmd == "refresh-all":
print(colorize("[] Refreshing all galleries...", Colors.CYAN))
print(colorize("[INFO] Refreshing all galleries...", Colors.CYAN))
tag_gallery.refresh_all_tags()
print(colorize("[] Refresh complete.", Colors.GREEN))
print(colorize("[OK] Refresh complete.", Colors.GREEN))
return
elif cmd == "refresh-one":
if not args:
print(colorize("[!] Usage: porndex refresh-one <folder_name>", Colors.RED))
print(colorize("[!] Usage: goondex refresh-one <folder_name>", Colors.RED))
sys.exit(1)
print(colorize(f"[] Refreshing gallery: {args[0]}", Colors.CYAN))
print(colorize(f"[INFO] Refreshing gallery: {args[0]}", Colors.CYAN))
tag_gallery.refresh_one(args[0])
print(colorize("[] Gallery refresh complete.", Colors.GREEN))
print(colorize("[OK] Gallery refresh complete.", Colors.GREEN))
return
# ─────────────────────────────
# VALIDATION COMMAND
# ─────────────────────────────
elif cmd == "validate-tags":
print(colorize("[🔍] Validating tag dictionaries...\n", Colors.CYAN))
print(colorize("[INFO] Validating tag dictionaries...\n", Colors.CYAN))
report = validate_tag_dictionaries()
total_files = len(report.get("files", {}))
@ -151,16 +179,16 @@ def main():
print(colorize(f"Conflicts: {total_conflicts}", Colors.RED if total_conflicts else Colors.GREEN))
print(colorize(f"Duplicates: {total_dupes}", Colors.YELLOW if total_dupes else Colors.GREEN))
print(f"Empty entries: {total_empty}")
print(colorize("\n[] Validation finished.", Colors.GREEN))
print(colorize("\n[OK] Validation finished.", Colors.GREEN))
return
# ─────────────────────────────
# TAG STATISTICS
# ─────────────────────────────
elif cmd == "tag-stats":
print(colorize("[📊] Generating tag statistics...", Colors.CYAN))
print(colorize("[INFO] Generating tag statistics...", Colors.CYAN))
tag_gallery.tag_stats()
print(colorize("[] Tag statistics report complete.", Colors.GREEN))
print(colorize("[OK] Tag statistics report complete.", Colors.GREEN))
return
# ─────────────────────────────
@ -170,9 +198,9 @@ def main():
func = getattr(tag_gallery, cmd.replace("-", "_"))
try:
func(*args)
print(colorize("[] Command executed successfully.", Colors.GREEN))
print(colorize("[OK] Command executed successfully.", Colors.GREEN))
except Exception as e:
print(colorize(f"[✖] Error: {e}", Colors.RED))
print(colorize(f"[ERROR] {e}", Colors.RED))
return
# ─────────────────────────────

View File

@ -25,8 +25,9 @@ from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone
from importer.fetch_gallery_metadata import fetch_gallery_metadata, save_gallery_metadata
from importer.index_builder import build_index
# ✅ FIXED IMPORTS — all explicitly under src.importer
from src.importer.fetch_gallery_metadata import fetch_gallery_metadata, save_gallery_metadata
from src.importer.index_builder import build_index
# ───────────────────────────────────────────────
# Constants
@ -149,7 +150,7 @@ def auto_tag_gallery(folder_name: str):
try:
print(color(f"[↻] Auto-tagging {folder_name} ...", "96"))
subprocess.run(
["python", "-m", "importer.tag_gallery", "refresh-one", folder_name],
["python", "-m", "src.importer.tag_gallery", "refresh-one", folder_name],
check=True
)
except Exception as e:
@ -162,13 +163,10 @@ def auto_tag_gallery(folder_name: str):
def import_gallery(url: str):
"""Fetch metadata, create/update folder, download images, update index."""
# Print only once when we start importing the gallery
print(color(f"[🌐] Importing gallery from URL: {url}", "96"))
# Fetch metadata for the gallery
metadata_new = fetch_gallery_metadata(url)
# Find existing gallery by title substring
# Detect existing gallery
existing_folder = None
for folder in os.listdir(BASE_SAVE_PATH):
folder_path = os.path.join(BASE_SAVE_PATH, folder)
@ -201,11 +199,9 @@ def import_gallery(url: str):
download_images_threaded(gallery_folder, image_urls)
print(color(f"[✓] Completed gallery import: {gallery_folder}", "92"))
# Auto-tag newly imported gallery
folder_name = os.path.basename(gallery_folder)
auto_tag_gallery(folder_name)
# Display stats summary
meta_path = os.path.join(gallery_folder, "metadata.json")
if os.path.exists(meta_path):
with open(meta_path, "r", encoding="utf-8") as f:
@ -228,10 +224,9 @@ def import_gallery(url: str):
else:
print(color("[⚠] No metadata.json found to display stats.", "93"))
# Final index update
build_index()
print(color("[✓] Gallery index updated — import complete.\n", GREEN))
# ───────────────────────────────────────────────
# Refresh all galleries

View File

@ -27,7 +27,7 @@ import sys
import yaml
import json
from importer.tag_utils import validate_tag_dictionaries
from src.importer.tag_utils import validate_tag_dictionaries
from collections import Counter
BASE_DIR = os.path.expanduser(

36
src/utils/install_alias.fish Executable file
View File

@ -0,0 +1,36 @@
#!/usr/bin/env fish
# ============================================================
# Goondex CLI Alias Installer (Fish-native)
# ============================================================
# Automatically sets up a persistent alias so you can run:
# goondex import <url>
# or any CLI command without typing the Python path manually.
# ============================================================
set project_root (pwd)
set venv_path "$project_root/.venv/bin/python"
set alias_line "alias goondex \"$venv_path -m importer.cli\""
set config_file "$HOME/.config/fish/config.fish"
echo "Installing Goondex alias for Fish..."
# Verify virtual environment exists
if not test -f $venv_path
echo "Error: Virtual environment not found at $venv_path"
echo "Please create it with: python -m venv .venv"
exit 1
end
# Add alias if not already present
if not grep -Fxq "$alias_line" $config_file 2>/dev/null
echo $alias_line >> $config_file
echo "Alias added to $config_file"
else
echo "Alias already exists in $config_file"
end
# Apply immediately
echo "Reloading Fish config..."
source $config_file
echo "Setup complete. Try: goondex --version"

30
src/utils/install_alias.sh Normal file → Executable file
View File

@ -2,41 +2,45 @@
# ============================================================
# File: src/utils/install_alias.sh
# Description:
# Adds the Porndex CLI alias for all supported shells (Fish, Bash, Zsh).
# Makes 'porndex' available globally so users can run commands like:
# porndex import "https://www.pornpics.com/galleries/example/"
# Adds the Goondex CLI alias for all supported shells (Fish, Bash, Zsh).
# Makes 'goondex' available globally so you can run commands like:
# goondex import "https://www.pornpics.com/galleries/example/"
# ============================================================
set -e
ALIAS_CMD='alias porndex="python -m src.importer.cli"'
ALIAS_CMD='alias goondex="python -m importer.cli"'
echo "🔧 Installing Porndex CLI alias..."
echo "Installing Goondex CLI alias..."
# ───────────────────────────────────────────────
# Fish shell
# ───────────────────────────────────────────────
if [ -n "$FISH_VERSION" ]; then
CONFIG_FILE="$HOME/.config/fish/config.fish"
if ! grep -Fxq "$ALIAS_CMD" "$CONFIG_FILE" 2>/dev/null; then
echo "$ALIAS_CMD" >> "$CONFIG_FILE"
echo "Added alias to $CONFIG_FILE"
echo "Added alias to $CONFIG_FILE"
else
echo "⚠️ Alias already present in $CONFIG_FILE"
echo "Alias already present in $CONFIG_FILE"
fi
echo "💡 Run 'source $CONFIG_FILE' or restart your terminal to apply."
echo "Reload your Fish config with: source $CONFIG_FILE"
exit 0
fi
# ───────────────────────────────────────────────
# Bash / Zsh fallback
# ───────────────────────────────────────────────
for FILE in "$HOME/.bashrc" "$HOME/.zshrc"; do
if [ -f "$FILE" ]; then
if ! grep -Fxq "$ALIAS_CMD" "$FILE"; then
if ! grep -Fxq "$ALIAS_CMD" "$FILE" 2>/dev/null; then
echo "$ALIAS_CMD" >> "$FILE"
echo "Added alias to $FILE"
echo "Added alias to $FILE"
else
echo "⚠️ Alias already present in $FILE"
echo "Alias already present in $FILE"
fi
fi
done
echo "💡 Restart your terminal or run 'source ~/.bashrc' to activate Porndex."
echo "Done"
echo "Reload your shell or run 'source ~/.bashrc' (or '~/.zshrc') to activate Goondex."
echo "Done."