diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0c2d6f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,74 @@ +# ============================================================ +# Goondex: Large / Sensitive Data Exclusions +# ============================================================ + +# ============================================================ +# πŸ–ΌοΈ Gallery Assets +# ------------------------------------------------------------ +# Exclude all image and video content from galleries. +# Keep only metadata.json and tag YAMLs for database integrity. +# ============================================================ +Galleries/**/*.jpg +Galleries/**/*.jpeg +Galleries/**/*.png +Galleries/**/*.gif +Galleries/**/*.webp +Galleries/**/*.mp4 +Galleries/**/*.mov +Galleries/**/*.avi +Galleries/**/*.mkv +Galleries/**/*.zip +Galleries/**/*.tar + +# Keep metadata and tag files +!Galleries/**/metadata.json +!Galleries/**/tags.yaml +!Galleries/README.md + +# ============================================================ +# 🧠 Machine Learning Artifacts +# ------------------------------------------------------------ +# Model weights, generated outputs, and cached intermediate data. +# ============================================================ +src/ml/models/*.pth +src/ml/outputs/** +src/ml/cache/** +src/ml/__pycache__/ +src/ml/__pycache__/* + +# ============================================================ +# 🧰 Build / Environment Files +# ------------------------------------------------------------ +# Prevent committing environment-specific or temporary files. +# ============================================================ +__pycache__/ +*.pyc +*.pyo +*.pyd +*.log +*.tmp +*.bak +*.swp + +# Virtual environments +.venv/ +env/ +venv/ + +# ============================================================ +# 🧱 Project-Level Exclusions +# ------------------------------------------------------------ +# Prevent committing local configurations or system files. +# ============================================================ +.DS_Store +Thumbs.db +.idea/ +.vscode/ +*.iml + +# ============================================================ +# 🧾 Dependency and Build Outputs +# ============================================================ +*.egg-info/ +build/ +dist/ diff --git "a/: update changelog for v0.3.0 β€” auto-tagging system and metadata expansion\"" "b/: update changelog for v0.3.0 β€” auto-tagging system and metadata expansion\"" deleted file mode 100644 index 74570f6..0000000 --- "a/: update changelog for v0.3.0 β€” auto-tagging system and metadata expansion\"" +++ /dev/null @@ -1,324 +0,0 @@ - - SSUUMMMMAARRYY OOFF LLEESSSS CCOOMMMMAANNDDSS - - Commands marked with * may be preceded by a number, _N. - Notes in parentheses indicate the behavior if _N is given. - A key preceded by a caret indicates the Ctrl key; thus ^K is ctrl-K. - - h H Display this help. - q :q Q :Q ZZ Exit. - --------------------------------------------------------------------------- - - MMOOVVIINNGG - - e ^E j ^N CR * Forward one line (or _N lines). - y ^Y k ^K ^P * Backward one line (or _N lines). - ESC-j * Forward one file line (or _N file lines). - ESC-k * Backward one file line (or _N file lines). - f ^F ^V SPACE * Forward one window (or _N lines). - b ^B ESC-v * Backward one window (or _N lines). - z * Forward one window (and set window to _N). - w * Backward one window (and set window to _N). - ESC-SPACE * Forward one window, but don't stop at end-of-file. - ESC-b * Backward one window, but don't stop at beginning-of-file. - d ^D * Forward one half-window (and set half-window to _N). - u ^U * Backward one half-window (and set half-window to _N). - ESC-) RightArrow * Right one half screen width (or _N positions). - ESC-( LeftArrow * Left one half screen width (or _N positions). - ESC-} ^RightArrow Right to last column displayed. - ESC-{ ^LeftArrow Left to first column. - F Forward forever; like "tail -f". - ESC-F Like F but stop when search pattern is found. - r ^R ^L Repaint screen. - R Repaint screen, discarding buffered input. - --------------------------------------------------- - Default "window" is the screen height. - Default "half-window" is half of the screen height. - --------------------------------------------------------------------------- - - SSEEAARRCCHHIINNGG - - /_p_a_t_t_e_r_n * Search forward for (_N-th) matching line. - ?_p_a_t_t_e_r_n * Search backward for (_N-th) matching line. - n * Repeat previous search (for _N-th occurrence). - N * Repeat previous search in reverse direction. - ESC-n * Repeat previous search, spanning files. - ESC-N * Repeat previous search, reverse dir. & spanning files. - ^O^N ^On * Search forward for (_N-th) OSC8 hyperlink. - ^O^P ^Op * Search backward for (_N-th) OSC8 hyperlink. - ^O^L ^Ol Jump to the currently selected OSC8 hyperlink. - ESC-u Undo (toggle) search highlighting. - ESC-U Clear search highlighting. - &_p_a_t_t_e_r_n * Display only matching lines. - --------------------------------------------------- - Search is case-sensitive unless changed with -i or -I. - A search pattern may begin with one or more of: - ^N or ! Search for NON-matching lines. - ^E or * Search multiple files (pass thru END OF FILE). - ^F or @ Start search at FIRST file (for /) or last file (for ?). - ^K Highlight matches, but don't move (KEEP position). - ^R Don't use REGULAR EXPRESSIONS. - ^S _n Search for match in _n-th parenthesized subpattern. - ^W WRAP search if no match found. - ^L Enter next character literally into pattern. - --------------------------------------------------------------------------- - - JJUUMMPPIINNGG - - g < ESC-< * Go to first line in file (or line _N). - G > ESC-> * Go to last line in file (or line _N). - p % * Go to beginning of file (or _N percent into file). - t * Go to the (_N-th) next tag. - T * Go to the (_N-th) previous tag. - { ( [ * Find close bracket } ) ]. - } ) ] * Find open bracket { ( [. - ESC-^F _<_c_1_> _<_c_2_> * Find close bracket _<_c_2_>. - ESC-^B _<_c_1_> _<_c_2_> * Find open bracket _<_c_1_>. - --------------------------------------------------- - Each "find close bracket" command goes forward to the close bracket - matching the (_N-th) open bracket in the top line. - Each "find open bracket" command goes backward to the open bracket - matching the (_N-th) close bracket in the bottom line. - - m_<_l_e_t_t_e_r_> Mark the current top line with . - M_<_l_e_t_t_e_r_> Mark the current bottom line with . - '_<_l_e_t_t_e_r_> Go to a previously marked position. - '' Go to the previous position. - ^X^X Same as '. - ESC-m_<_l_e_t_t_e_r_> Clear a mark. - --------------------------------------------------- - A mark is any upper-case or lower-case letter. - Certain marks are predefined: - ^ means beginning of the file - $ means end of the file - --------------------------------------------------------------------------- - - CCHHAANNGGIINNGG FFIILLEESS - - :e [_f_i_l_e] Examine a new file. - ^X^V Same as :e. - :n * Examine the (_N-th) next file from the command line. - :p * Examine the (_N-th) previous file from the command line. - :x * Examine the first (or _N-th) file from the command line. - ^O^O Open the currently selected OSC8 hyperlink. - :d Delete the current file from the command line list. - = ^G :f Print current file name. - --------------------------------------------------------------------------- - - MMIISSCCEELLLLAANNEEOOUUSS CCOOMMMMAANNDDSS - - -_<_f_l_a_g_> Toggle a command line option [see OPTIONS below]. - --_<_n_a_m_e_> Toggle a command line option, by name. - __<_f_l_a_g_> Display the setting of a command line option. - ___<_n_a_m_e_> Display the setting of an option, by name. - +_c_m_d Execute the less cmd each time a new file is examined. - - !_c_o_m_m_a_n_d Execute the shell command with $SHELL. - #_c_o_m_m_a_n_d Execute the shell command, expanded like a prompt. - |XX_c_o_m_m_a_n_d Pipe file between current pos & mark XX to shell command. - s _f_i_l_e Save input to a file. - v Edit the current file with $VISUAL or $EDITOR. - V Print version number of "less". - --------------------------------------------------------------------------- - - OOPPTTIIOONNSS - - Most options may be changed either on the command line, - or from within less by using the - or -- command. - Options may be given in one of two forms: either a single - character preceded by a -, or a name preceded by --. - - -? ........ --help - Display help (from command line). - -a ........ --search-skip-screen - Search skips current screen. - -A ........ --SEARCH-SKIP-SCREEN - Search starts just after target line. - -b [_N] .... --buffers=[_N] - Number of buffers. - -B ........ --auto-buffers - Don't automatically allocate buffers for pipes. - -c ........ --clear-screen - Repaint by clearing rather than scrolling. - -d ........ --dumb - Dumb terminal. - -D xx_c_o_l_o_r . --color=xx_c_o_l_o_r - Set screen colors. - -e -E .... --quit-at-eof --QUIT-AT-EOF - Quit at end of file. - -f ........ --force - Force open non-regular files. - -F ........ --quit-if-one-screen - Quit if entire file fits on first screen. - -g ........ --hilite-search - Highlight only last match for searches. - -G ........ --HILITE-SEARCH - Don't highlight any matches for searches. - -h [_N] .... --max-back-scroll=[_N] - Backward scroll limit. - -i ........ --ignore-case - Ignore case in searches that do not contain uppercase. - -I ........ --IGNORE-CASE - Ignore case in all searches. - -j [_N] .... --jump-target=[_N] - Screen position of target lines. - -J ........ --status-column - Display a status column at left edge of screen. - -k _f_i_l_e ... --lesskey-file=_f_i_l_e - Use a compiled lesskey file. - -K ........ --quit-on-intr - Exit less in response to ctrl-C. - -L ........ --no-lessopen - Ignore the LESSOPEN environment variable. - -m -M .... --long-prompt --LONG-PROMPT - Set prompt style. - -n ......... --line-numbers - Suppress line numbers in prompts and messages. - -N ......... --LINE-NUMBERS - Display line number at start of each line. - -o [_f_i_l_e] .. --log-file=[_f_i_l_e] - Copy to log file (standard input only). - -O [_f_i_l_e] .. --LOG-FILE=[_f_i_l_e] - Copy to log file (unconditionally overwrite). - -p _p_a_t_t_e_r_n . --pattern=[_p_a_t_t_e_r_n] - Start at pattern (from command line). - -P [_p_r_o_m_p_t] --prompt=[_p_r_o_m_p_t] - Define new prompt. - -q -Q .... --quiet --QUIET --silent --SILENT - Quiet the terminal bell. - -r -R .... --raw-control-chars --RAW-CONTROL-CHARS - Output "raw" control characters. - -s ........ --squeeze-blank-lines - Squeeze multiple blank lines. - -S ........ --chop-long-lines - Chop (truncate) long lines rather than wrapping. - -t _t_a_g .... --tag=[_t_a_g] - Find a tag. - -T [_t_a_g_s_f_i_l_e] --tag-file=[_t_a_g_s_f_i_l_e] - Use an alternate tags file. - -u -U .... --underline-special --UNDERLINE-SPECIAL - Change handling of backspaces, tabs and carriage returns. - -V ........ --version - Display the version number of "less". - -w ........ --hilite-unread - Highlight first new line after forward-screen. - -W ........ --HILITE-UNREAD - Highlight first new line after any forward movement. - -x [_N[,...]] --tabs=[_N[,...]] - Set tab stops. - -X ........ --no-init - Don't use termcap init/deinit strings. - -y [_N] .... --max-forw-scroll=[_N] - Forward scroll limit. - -z [_N] .... --window=[_N] - Set size of window. - -" [_c[_c]] . --quotes=[_c[_c]] - Set shell quote characters. - -~ ........ --tilde - Don't display tildes after end of file. - -# [_N] .... --shift=[_N] - Set horizontal scroll amount (0 = one half screen width). - - --exit-follow-on-close - Exit F command on a pipe when writer closes pipe. - --file-size - Automatically determine the size of the input file. - --follow-name - The F command changes files if the input file is renamed. - --form-feed - Stop scrolling when a form feed character is reached. - --header=[_L[,_C[,_N]]] - Use _L lines (starting at line _N) and _C columns as headers. - --incsearch - Search file as each pattern character is typed in. - --intr=[_C] - Use _C instead of ^X to interrupt a read. - --lesskey-context=_t_e_x_t - Use lesskey source file contents. - --lesskey-src=_f_i_l_e - Use a lesskey source file. - --line-num-width=[_N] - Set the width of the -N line number field to _N characters. - --match-shift=[_N] - Show at least _N characters to the left of a search match. - --modelines=[_N] - Read _N lines from the input file and look for vim modelines. - --mouse - Enable mouse input. - --no-edit-warn - Don't warn when using v command on a file opened via LESSOPEN. - --no-keypad - Don't send termcap keypad init/deinit strings. - --no-histdups - Remove duplicates from command history. - --no-number-headers - Don't give line numbers to header lines. - --no-paste - Ignore pasted input. - --no-search-header-lines - Searches do not include header lines. - --no-search-header-columns - Searches do not include header columns. - --no-search-headers - Searches do not include header lines or columns. - --no-vbell - Disable the terminal's visual bell. - --redraw-on-quit - Redraw final screen when quitting. - --rscroll=[_C] - Set the character used to mark truncated lines. - --save-marks - Retain marks across invocations of less. - --search-options=[EFKNRW-] - Set default options for every search. - --show-preproc-errors - Display a message if preprocessor exits with an error status. - --proc-backspace - Process backspaces for bold/underline. - --PROC-BACKSPACE - Treat backspaces as control characters. - --proc-return - Delete carriage returns before newline. - --PROC-RETURN - Treat carriage returns as control characters. - --proc-tab - Expand tabs to spaces. - --PROC-TAB - Treat tabs as control characters. - --status-col-width=[_N] - Set the width of the -J status column to _N characters. - --status-line - Highlight or color the entire line containing a mark. - --use-backslash - Subsequent options use backslash as escape char. - --use-color - Enables colored text. - --wheel-lines=[_N] - Each click of the mouse wheel moves _N lines. - --wordwrap - Wrap lines at spaces. - - - --------------------------------------------------------------------------- - - LLIINNEE EEDDIITTIINNGG - - These keys can be used to edit text being entered - on the "command line" at the bottom of the screen. - - RightArrow ..................... ESC-l ... Move cursor right one character. - LeftArrow ...................... ESC-h ... Move cursor left one character. - ctrl-RightArrow ESC-RightArrow ESC-w ... Move cursor right one word. - ctrl-LeftArrow ESC-LeftArrow ESC-b ... Move cursor left one word. - HOME ........................... ESC-0 ... Move cursor to start of line. - END ............................ ESC-$ ... Move cursor to end of line. - BACKSPACE ................................ Delete char to left of cursor. - DELETE ......................... ESC-x ... Delete char under cursor. - ctrl-BACKSPACE ESC-BACKSPACE ........... Delete word to left of cursor. - ctrl-DELETE .... ESC-DELETE .... ESC-X ... Delete word under cursor. - ctrl-U ......... ESC (MS-DOS only) ....... Delete entire line. - UpArrow ........................ ESC-k ... Retrieve previous command line. - DownArrow ...................... ESC-j ... Retrieve next command line. - TAB ...................................... Complete filename & cycle. - SHIFT-TAB ...................... ESC-TAB Complete filename & reverse cycle. - ctrl-L ................................... Complete filename, list all. diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 63be8cc..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,309 +0,0 @@ -# πŸ“œ Porndex Importer β€” Full Changelog -> **Project:** Porndex_PornpicsImporter -> **Repository:** Leak Technologies -> **Branch:** main -> **Version Line:** v0.3.x Development Cycle - ---- - -## [v0.3.0] β€” Modular Tagging Framework Foundation (2025-10-18) - -### ✨ Added -- Introduced **YAML-based Tag Dictionaries** stored under `/src/importer/tagging/` for modular, human-readable tag definitions. -- Implemented initial **`refresh-all`** and **`refresh-one`** commands for reapplying tag inference to galleries. -- Added **persistent `inferred_tags` field** in `metadata.json` to differentiate between automated and manual tags. -- Implemented **automatic source inference** for known networks (e.g., Brazzers, FTV Girls, PornPics). -- Enhanced CLI output with colorized progress indicators and summary totals. - -### πŸ›  Changed -- Refactored `tag_gallery.py` for modular tagging architecture. -- Centralized configuration paths to `/src/importer/config/` for easier project-wide access. - -### 🧹 Maintenance -- Improved exception handling for missing or malformed tag dictionaries. -- Added consistent emoji/logging system across CLI commands. - ---- - -## [v0.3.1] β€” CLI Polishing & Dictionary Improvements (2025-10-19) - -### ✨ Added -- Introduced **CLI argument parsing** with `argparse` for a unified user interface. -- Added `--verbose` flag for detailed debugging output. -- Added **metadata validation** to ensure all tag dictionaries contain unique keywords. - -### πŸ›  Changed -- Adjusted internal path resolution to work from both installed and development environments. -- Improved `load_all_tag_maps()` with caching and better error resilience. - -### 🧹 Maintenance -- Cleaned duplicate mappings within YAML files. -- Improved documentation and inline docstrings throughout importer modules. - ---- - -## [v0.3.2] β€” TPDB Bridge Integration (2025-10-21) - -### ✨ Added -- Introduced **`tpdb_bridge.py`** for importing performer data from *ThePornDB* API. -- Added local **SQLite performer database** under `/src/importer/db/performers.db`. -- Added commands: - - `fetch` β€” Import performers in a single batch. - - `fill-index` β€” Continuously pull until a limit is reached. - - `enrich` β€” Fetch and merge extended performer metadata. - - `sync-all` β€” Hybrid incremental fetch + enrich loop. -- Introduced **local API key management** using `tpdb_api_key.txt` under `/secrets/`. - -### 🧹 Maintenance -- Verified importer against TPDB rate limits and ensured safe error recovery. -- Added initial test data exports to `/src/importer/reports/`. - ---- - -## [v0.3.3] β€” YAML Tag Inference Update (2025-10-20) - -### ✨ Added -- Dynamic **YAML tag dictionary loader** for modular tag categories. -- Introduced **automatic source inference** for common networks. -- Added **`refresh-all`** bulk operation to reapply tag inference globally. - -### πŸ›  Changed -- Refactored `infer_tags()` to merge results from multiple YAML files dynamically. -- Enhanced progress and summary reporting for tag inference. - -### 🧹 Maintenance -- Fixed `AttributeError: 'int' object has no attribute 'lower'` when parsing numeric YAML values. -- Standardized internal naming conventions. - ---- - -## [v0.3.4] β€” Tag Dictionary Validation & Cleanup (2025-10-20) - -### ✨ Added -- **`validate-tags`** CLI command for verifying YAML tag dictionaries. - - Detects duplicates, empty entries, and conflicting keywords. - - Outputs detailed summaries with per-keyword conflict listings. - -### πŸ›  Changed -- Standardized YAML structure enforcement (consistent key capitalization and layout). -- Added human-readable validation summaries. - -### 🧹 Maintenance -- General code cleanup and consistent logging system updates. - ---- - -## [v0.3.5] β€” Tag Statistics & Unified CLI Update (2025-10-20) - -### ✨ Added -- **Tag Statistics System** - - Introduced `tag-stats` command to generate frequency analytics across all gallery metadata. - - Produces both console summaries and saved reports: - - `reports/tag_stats.json` β€” JSON-formatted tag counts. - - `reports/tag_stats_sorted.txt` β€” human-readable ranked list. -- **Unified CLI Interface (`cli.py`)** - - Consolidated all tagging and maintenance operations into a single entrypoint: - - `refresh-all`, `refresh-one`, `validate-tags`, `tag-stats`, `list`, `list-tags`, `add`, `remove`, `add-multi`, `show-metadata`, `source` - - Standardized command syntax and output formatting across all operations. - -### πŸ›  Changed -- Centralized tag frequency logic into `tag_gallery.py`. -- Refactored CLI dispatch system for scalability and better error handling. -- Standardized output style (headers, dividers, alignment). - -### 🧹 Maintenance -- Automatic creation of `/src/importer/reports/` when missing. -- Verified all tag operations across 60+ galleries. -- Unified terminology and capitalization across CLI help text and docstrings. - -### 🧭 Next Steps -- Add color-coded CLI output for readability. -- Implement `--export-csv` flag for `tag-stats` output. -- Begin roadmap for **v0.4.0** introducing ML-based tag confidence scoring and category weighting. - ---- - -## [v0.3.6] β€” Enrichment Verification & Freshness Tracking (2025-10-26) - -### ✨ Added -- **verify-enrichment command** - - Scans performer database for missing metadata (e.g., `url`, `last_updated`). - - Reports enriched vs incomplete entries, with preview via `--show-missing`. -- **Freshness tracking** - - Displays oldest and most recent enrichment timestamps. - - Warns if data is older than the freshness threshold. -- **Automatic TPDB key validation** - - Checks for valid API key and provides setup help if missing. - -### πŸ›  Changed -- Enrichment logic now guarantees `url` and `last_updated` fields for all performers. -- Improved emoji-based CLI logs for clarity. -- CLI outputs enrichment stats after each batch during `sync-all`. - -### 🧹 Maintenance -- Cleanup and refactor of `tpdb_bridge.py` for readability and modular design. -- Verified completeness: **5,087 performers enriched** and up to date. -- Improved sleep timing and network error recovery during long sync runs. - -### 🧭 Next Steps -- Add `--stale-days` CLI flag for user-defined freshness thresholds. -- Implement automatic enrichment scheduling via cron or systemd. -- Add shortcut alias `porndex-importer verify` for database status checks. - ---- - -[v0.3.7] β€” Scene-Based Enrichment & Channel Auto-Upgrade (2025-10-26) -✨ Added - -Scene-based enrichment system - -New flag --use-scenes enables intelligent inference of performer studios/channels using recent scene data from ThePornDB. - -Automatically scans /performers/{id}/scenes for studio, site, or network fields when direct metadata is missing. - -Dynamically upgrades performer entries from β€œUnknown” to valid channel names (e.g., β€œDesire Room”, β€œI Want Clips: Princess Chanel”). - -Enhanced enrichment diagnostics - ---debug-channels now outputs detailed channel inference logs with origin type (e.g., β€œvia scene” or β€œvia performer metadata”). - -Emoji-coded output for improved clarity: - -🎞 Scene-based upgrades - -🎬 Direct metadata - -⚫ Missing channel info - -Progress verification - -verify-enrichment now reports precise completion percentages and lists the most recent 20 upgraded performers. - -πŸ›  Changed - -Enrichment process now performs automatic in-place upgrades of performer_sources without overwriting other fields. - -Optimized query logic to prioritize unverified performers and handle large datasets efficiently. - -Added fine-grained sleep control between API requests to stay compliant with TPDB rate limits. - -🧹 Maintenance - -Refactored enrichment functions for modularity: - -_fetch_studio_from_scenes() introduced for scene scanning. - -Simplified argument handling and enriched exception tracing. - -Verified enrichment stability across 100 performers with 44% successful channel discovery in live test. - -Improved timestamp consistency in verification logs and upgraded database schema resilience. - -[v0.4.2] β€” Unified Importer, ML Pipeline, and Semantic Search (2025-10-27) -✨ Added - -Unified Importer CLI (porndex-importer) - -Replaces legacy multi-script workflow with a single command entrypoint. - -Introduced import, refresh-all, refresh-one, validate-tags, tag-stats, and source subcommands. - -Includes colorized CLI summaries and consistent emoji headers. - -Machine Learning Dataset Builder - -New module: ml/ml_dataset_builder.py - -Generates structured dataset in ML/porndex_dataset.jsonl from all indexed galleries. - -Each record includes title, models, tags, and image paths for hybrid ML ingestion. - -Embedding Generation Module - -Added ml/ml_embeddings.py to create hybrid text + image embeddings. - -Builds per-gallery NPZ files under ML/embeddings/ and a consolidated embeddings_index.jsonl. - -Supports configurable --img-samples and automatic device detection (--device auto). - -Semantic & Strict Search - -search command supports three modes: - -semantic: CLIP + text hybrid cosine similarity (default) - -text: text-only vector space search - -strict: literal match filtering before vector ranking - -Results show top-ranked galleries, confidence scores, and gallery IDs. - -ML Verification Command - -verify confirms index consistency, embedding count, and file integrity. - -Directory Auto-Creation - -Automatically generates ML/embeddings/ and ML/ if missing. - -πŸ›  Changed - -Importer Pipeline Refactor - -Moved all CLI handling into src/importer/cli.py. - -Centralized environment setup and config loading. - -Replaced direct Python script calls with porndex-importer entrypoint. - -Tagging System - -Unified YAML dictionary loading for clothing, acts, body, and context. - -Improved tag inference logging and duplicate suppression. - -Output Formatting - -Standardized headers, dividers, and indentation across all CLI commands. - -Added readable time and path indicators for long-running operations. - -🧹 Maintenance - -Verified full ML dataset build across 150 test galleries (100% JSONL completion). - -Added fallback for empty or missing image lists in dataset builder. - -Improved error handling for partial downloads and interrupted imports. - -Streamlined path resolution for consistent operation across dev and installed modes. - -Updated documentation: - -/docs/CLI_USAGE.md rewritten for v0.4.2. - -/README.md modernized with full project tree and ML pipeline overview. - -🧭 Next Steps - -Begin v0.4.3–v0.5.x roadmap: - -Integrate GroundingDINO + GroundedSAM for visual region detection. - -Implement attribute extraction (gender β†’ ethnicity β†’ clothing). - -Build visual verification tool (ml_dataset_inspector.py). - -Add tag-confidence weighting system. - -Extend TPDB bridge to cross-link enriched performer metadata into ML training records. - -🧩 Summary of Current State (as of v0.4.2) - -βœ… Fully unified CLI under porndex-importer -βœ… Stable YAML tagging + validation -βœ… Complete ML dataset and embedding generation workflow -βœ… Working hybrid semantic search -βœ… Verified 150-gallery dataset index - -Β© 2025 Leak Technologies β€” Porndex Importer Project \ No newline at end of file diff --git a/VERSION b/VERSION index e69de29..d15723f 100644 --- a/VERSION +++ b/VERSION @@ -0,0 +1 @@ +0.3.2 diff --git a/requirements.txt b/requirements.txt index 9f7f3f8..16b91b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ requests beautifulsoup4 lxml tqdm +pyyaml \ No newline at end of file diff --git a/src/importer/__pycache__/cli.cpython-313.pyc b/src/importer/__pycache__/cli.cpython-313.pyc index b047d93..13fcb60 100644 Binary files a/src/importer/__pycache__/cli.cpython-313.pyc and b/src/importer/__pycache__/cli.cpython-313.pyc differ diff --git a/src/importer/__pycache__/gallery_importer.cpython-313.pyc b/src/importer/__pycache__/gallery_importer.cpython-313.pyc index f6369c5..cdbc996 100644 Binary files a/src/importer/__pycache__/gallery_importer.cpython-313.pyc and b/src/importer/__pycache__/gallery_importer.cpython-313.pyc differ diff --git a/src/importer/__pycache__/tag_gallery.cpython-313.pyc b/src/importer/__pycache__/tag_gallery.cpython-313.pyc index 14aad0c..79e7a63 100644 Binary files a/src/importer/__pycache__/tag_gallery.cpython-313.pyc and b/src/importer/__pycache__/tag_gallery.cpython-313.pyc differ diff --git a/src/importer/cli.py b/src/importer/cli.py index 0b3ee96..6b567a3 100644 --- a/src/importer/cli.py +++ b/src/importer/cli.py @@ -2,30 +2,30 @@ # ============================================================ # File: src/importer/cli.py # Description: -# Porndex CLI +# Goondex CLI # Unified command-line interface for gallery importing, # tagging, metadata refresh, and analytics with color-coded output. # ============================================================ """ -Porndex CLI +Goondex CLI =========== Unified command-line interface for gallery importing, tagging, metadata refresh, and tag analytics with color-coded output. Usage examples: - porndex import "https://www.pornpics.com/galleries/..." - porndex refresh-all - porndex refresh-one "Gallery_Name" - porndex validate-tags - porndex tag-stats + goondex import "https://www.pornpics.com/galleries/..." + goondex refresh-all + goondex refresh-one "Gallery_Name" + goondex validate-tags + goondex tag-stats """ import sys import os -from importer import tag_gallery -from importer.tag_utils import validate_tag_dictionaries -from importer.gallery_importer import import_gallery # β¬… NEW import +from src.importer import tag_gallery +from src.importer.tag_utils import validate_tag_dictionaries +from src.importer.gallery_importer import import_gallery # ─────────────────────────────────────────────── @@ -42,19 +42,40 @@ class Colors: def colorize(text, color): - """Helper to wrap text with ANSI color codes.""" + """Wrap text with ANSI color codes.""" return f"{color}{text}{Colors.RESET}" +# ─────────────────────────────────────────────── +# VERSION HANDLER +# ─────────────────────────────────────────────── +def load_version(): + """Safely read version number from root VERSION file.""" + try: + root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) + version_file = os.path.join(root_dir, "VERSION") + with open(version_file, "r", encoding="utf-8") as f: + return f.read().strip() + except FileNotFoundError: + return "unknown" + + +def print_version(): + """Print Goondex version string.""" + version = load_version() + print(colorize(f"Goondex v{version}", Colors.CYAN)) + sys.exit(0) + + # ─────────────────────────────────────────────── # USAGE HELP # ─────────────────────────────────────────────── def print_usage(): - print(colorize("\nPorndex CLI", Colors.BOLD)) + print(colorize("\nGoondex CLI", Colors.BOLD)) print(colorize("─────────────────────────────────────", Colors.CYAN)) print(""" Usage: - porndex [args...] + goondex [args...] Commands: import Import a new gallery from PornPics @@ -70,11 +91,15 @@ Commands: show-metadata Show metadata.json content source set Set gallery source (single or all) +Flags: + -h, --help Show this help message + -v, --version Show current version + Examples: - porndex import "https://www.pornpics.com/galleries/example/" - porndex refresh-one "20251020_2117_Mariella_Sun" - porndex tag-stats - porndex validate-tags + goondex import "https://www.pornpics.com/galleries/example/" + goondex refresh-one "20251020_2117_Mariella_Sun" + goondex tag-stats + goondex validate-tags """.strip()) @@ -90,27 +115,30 @@ def main(): args = sys.argv[2:] # ───────────────────────────── - # HELP FLAG SUPPORT + # HELP / VERSION FLAGS # ───────────────────────────── if cmd in ("--help", "-h", "help"): print_usage() sys.exit(0) + if cmd in ("--version", "-v", "version"): + print_version() + # ───────────────────────────── # IMPORT NEW GALLERY # ───────────────────────────── if cmd == "import": if not args: - print(colorize("[!] Usage: porndex import ", Colors.RED)) + print(colorize("[!] Usage: goondex import ", Colors.RED)) sys.exit(1) url = args[0] - print(colorize(f"[🌐] Importing gallery from URL: {url}", Colors.CYAN)) + print(colorize(f"[INFO] Importing gallery from URL: {url}", Colors.CYAN)) try: import_gallery(url) - print(colorize("[βœ“] Gallery import completed successfully.", Colors.GREEN)) + print(colorize("[OK] Gallery import completed successfully.", Colors.GREEN)) except Exception as e: - print(colorize(f"[βœ–] Import failed: {e}", Colors.RED)) + print(colorize(f"[ERROR] Import failed: {e}", Colors.RED)) sys.exit(1) return @@ -118,25 +146,25 @@ def main(): # REFRESH COMMANDS # ───────────────────────────── if cmd == "refresh-all": - print(colorize("[↻] Refreshing all galleries...", Colors.CYAN)) + print(colorize("[INFO] Refreshing all galleries...", Colors.CYAN)) tag_gallery.refresh_all_tags() - print(colorize("[βœ“] Refresh complete.", Colors.GREEN)) + print(colorize("[OK] Refresh complete.", Colors.GREEN)) return elif cmd == "refresh-one": if not args: - print(colorize("[!] Usage: porndex refresh-one ", Colors.RED)) + print(colorize("[!] Usage: goondex refresh-one ", Colors.RED)) sys.exit(1) - print(colorize(f"[↻] Refreshing gallery: {args[0]}", Colors.CYAN)) + print(colorize(f"[INFO] Refreshing gallery: {args[0]}", Colors.CYAN)) tag_gallery.refresh_one(args[0]) - print(colorize("[βœ“] Gallery refresh complete.", Colors.GREEN)) + print(colorize("[OK] Gallery refresh complete.", Colors.GREEN)) return # ───────────────────────────── # VALIDATION COMMAND # ───────────────────────────── elif cmd == "validate-tags": - print(colorize("[πŸ”] Validating tag dictionaries...\n", Colors.CYAN)) + print(colorize("[INFO] Validating tag dictionaries...\n", Colors.CYAN)) report = validate_tag_dictionaries() total_files = len(report.get("files", {})) @@ -151,16 +179,16 @@ def main(): print(colorize(f"Conflicts: {total_conflicts}", Colors.RED if total_conflicts else Colors.GREEN)) print(colorize(f"Duplicates: {total_dupes}", Colors.YELLOW if total_dupes else Colors.GREEN)) print(f"Empty entries: {total_empty}") - print(colorize("\n[βœ“] Validation finished.", Colors.GREEN)) + print(colorize("\n[OK] Validation finished.", Colors.GREEN)) return # ───────────────────────────── # TAG STATISTICS # ───────────────────────────── elif cmd == "tag-stats": - print(colorize("[πŸ“Š] Generating tag statistics...", Colors.CYAN)) + print(colorize("[INFO] Generating tag statistics...", Colors.CYAN)) tag_gallery.tag_stats() - print(colorize("[βœ“] Tag statistics report complete.", Colors.GREEN)) + print(colorize("[OK] Tag statistics report complete.", Colors.GREEN)) return # ───────────────────────────── @@ -170,9 +198,9 @@ def main(): func = getattr(tag_gallery, cmd.replace("-", "_")) try: func(*args) - print(colorize("[βœ“] Command executed successfully.", Colors.GREEN)) + print(colorize("[OK] Command executed successfully.", Colors.GREEN)) except Exception as e: - print(colorize(f"[βœ–] Error: {e}", Colors.RED)) + print(colorize(f"[ERROR] {e}", Colors.RED)) return # ───────────────────────────── diff --git a/src/importer/gallery_importer.py b/src/importer/gallery_importer.py index e71c536..bf0a490 100644 --- a/src/importer/gallery_importer.py +++ b/src/importer/gallery_importer.py @@ -25,8 +25,9 @@ from tqdm import tqdm from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime, timezone -from importer.fetch_gallery_metadata import fetch_gallery_metadata, save_gallery_metadata -from importer.index_builder import build_index +# βœ… FIXED IMPORTS β€” all explicitly under src.importer +from src.importer.fetch_gallery_metadata import fetch_gallery_metadata, save_gallery_metadata +from src.importer.index_builder import build_index # ─────────────────────────────────────────────── # Constants @@ -149,7 +150,7 @@ def auto_tag_gallery(folder_name: str): try: print(color(f"[↻] Auto-tagging {folder_name} ...", "96")) subprocess.run( - ["python", "-m", "importer.tag_gallery", "refresh-one", folder_name], + ["python", "-m", "src.importer.tag_gallery", "refresh-one", folder_name], check=True ) except Exception as e: @@ -162,13 +163,10 @@ def auto_tag_gallery(folder_name: str): def import_gallery(url: str): """Fetch metadata, create/update folder, download images, update index.""" - # Print only once when we start importing the gallery print(color(f"[🌐] Importing gallery from URL: {url}", "96")) - - # Fetch metadata for the gallery metadata_new = fetch_gallery_metadata(url) - # Find existing gallery by title substring + # Detect existing gallery existing_folder = None for folder in os.listdir(BASE_SAVE_PATH): folder_path = os.path.join(BASE_SAVE_PATH, folder) @@ -201,11 +199,9 @@ def import_gallery(url: str): download_images_threaded(gallery_folder, image_urls) print(color(f"[βœ“] Completed gallery import: {gallery_folder}", "92")) - # Auto-tag newly imported gallery folder_name = os.path.basename(gallery_folder) auto_tag_gallery(folder_name) - # Display stats summary meta_path = os.path.join(gallery_folder, "metadata.json") if os.path.exists(meta_path): with open(meta_path, "r", encoding="utf-8") as f: @@ -228,10 +224,9 @@ def import_gallery(url: str): else: print(color("[⚠] No metadata.json found to display stats.", "93")) - # Final index update build_index() print(color("[βœ“] Gallery index updated β€” import complete.\n", GREEN)) - + # ─────────────────────────────────────────────── # Refresh all galleries diff --git a/src/importer/tag_gallery.py b/src/importer/tag_gallery.py index fddbe8f..a1d8e95 100644 --- a/src/importer/tag_gallery.py +++ b/src/importer/tag_gallery.py @@ -27,7 +27,7 @@ import sys import yaml import json -from importer.tag_utils import validate_tag_dictionaries +from src.importer.tag_utils import validate_tag_dictionaries from collections import Counter BASE_DIR = os.path.expanduser( diff --git a/src/utils/install_alias.fish b/src/utils/install_alias.fish new file mode 100755 index 0000000..a85042a --- /dev/null +++ b/src/utils/install_alias.fish @@ -0,0 +1,36 @@ +#!/usr/bin/env fish +# ============================================================ +# Goondex CLI Alias Installer (Fish-native) +# ============================================================ +# Automatically sets up a persistent alias so you can run: +# goondex import +# or any CLI command without typing the Python path manually. +# ============================================================ + +set project_root (pwd) +set venv_path "$project_root/.venv/bin/python" +set alias_line "alias goondex \"$venv_path -m importer.cli\"" +set config_file "$HOME/.config/fish/config.fish" + +echo "Installing Goondex alias for Fish..." + +# Verify virtual environment exists +if not test -f $venv_path + echo "Error: Virtual environment not found at $venv_path" + echo "Please create it with: python -m venv .venv" + exit 1 +end + +# Add alias if not already present +if not grep -Fxq "$alias_line" $config_file 2>/dev/null + echo $alias_line >> $config_file + echo "Alias added to $config_file" +else + echo "Alias already exists in $config_file" +end + +# Apply immediately +echo "Reloading Fish config..." +source $config_file + +echo "Setup complete. Try: goondex --version" diff --git a/src/utils/install_alias.sh b/src/utils/install_alias.sh old mode 100644 new mode 100755 index 1f2da1c..cd1a2bd --- a/src/utils/install_alias.sh +++ b/src/utils/install_alias.sh @@ -2,41 +2,45 @@ # ============================================================ # File: src/utils/install_alias.sh # Description: -# Adds the Porndex CLI alias for all supported shells (Fish, Bash, Zsh). -# Makes 'porndex' available globally so users can run commands like: -# porndex import "https://www.pornpics.com/galleries/example/" +# Adds the Goondex CLI alias for all supported shells (Fish, Bash, Zsh). +# Makes 'goondex' available globally so you can run commands like: +# goondex import "https://www.pornpics.com/galleries/example/" # ============================================================ set -e -ALIAS_CMD='alias porndex="python -m src.importer.cli"' +ALIAS_CMD='alias goondex="python -m importer.cli"' -echo "πŸ”§ Installing Porndex CLI alias..." +echo "Installing Goondex CLI alias..." +# ─────────────────────────────────────────────── # Fish shell +# ─────────────────────────────────────────────── if [ -n "$FISH_VERSION" ]; then CONFIG_FILE="$HOME/.config/fish/config.fish" if ! grep -Fxq "$ALIAS_CMD" "$CONFIG_FILE" 2>/dev/null; then echo "$ALIAS_CMD" >> "$CONFIG_FILE" - echo "βœ… Added alias to $CONFIG_FILE" + echo "Added alias to $CONFIG_FILE" else - echo "⚠️ Alias already present in $CONFIG_FILE" + echo "Alias already present in $CONFIG_FILE" fi - echo "πŸ’‘ Run 'source $CONFIG_FILE' or restart your terminal to apply." + echo "Reload your Fish config with: source $CONFIG_FILE" exit 0 fi +# ─────────────────────────────────────────────── # Bash / Zsh fallback +# ─────────────────────────────────────────────── for FILE in "$HOME/.bashrc" "$HOME/.zshrc"; do if [ -f "$FILE" ]; then - if ! grep -Fxq "$ALIAS_CMD" "$FILE"; then + if ! grep -Fxq "$ALIAS_CMD" "$FILE" 2>/dev/null; then echo "$ALIAS_CMD" >> "$FILE" - echo "βœ… Added alias to $FILE" + echo "Added alias to $FILE" else - echo "⚠️ Alias already present in $FILE" + echo "Alias already present in $FILE" fi fi done -echo "πŸ’‘ Restart your terminal or run 'source ~/.bashrc' to activate Porndex." -echo "Done βœ…" +echo "Reload your shell or run 'source ~/.bashrc' (or '~/.zshrc') to activate Goondex." +echo "Done."