k4d3 commited on Sep 23, 2024

Commit

f1a2ec8

1 Parent(s): efca0c9

Initial commit

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (20) hide show

.tmux.conf +42 -0
.zshrc +498 -0
9em124t2-499968/clip_model.pt +3 -0
9em124t2-499968/config.yaml +39 -0
9em124t2-499968/image_adapter.pt +3 -0
9em124t2-499968/text_model/README.md +202 -0
9em124t2-499968/text_model/adapter_config.json +29 -0
9em124t2-499968/text_model/adapter_model.safetensors +3 -0
LICENSE +21 -0
ascii_art/gaeros +11 -0
ascii_art/kade +13 -0
crawl/crawl +146 -0
crawl/crawl4ai.pyi +58 -0
crawl/crawl_wikipedia +182 -0
joy +555 -0
jtp2 +290 -0
jtp2_overwrite +316 -0
paper-qa.code-workspace +11 -0
papers_please +151 -0
password +14 -0

.tmux.conf ADDED Viewed

	@@ -0,0 +1,42 @@

+# List of plugins
+set -g @plugin 'tmux-plugins/tpm'
+set -g @plugin 'tmux-plugins/tmux-sensible'
+set -g @plugin 'tmux-plugins/tmux-yank'
+set -g @plugin 'tmux-plugins/tmux-resurrect'
+set -g @plugin 'tmux-plugins/tmux-continuum'
+set -g @plugin 'sainnhe/tmux-fzf'
+set -g @plugin 'catppuccin/tmux'
+# Enable clipboard integration
+set -g set-clipboard on
+# Enable tmux-continuum and set it to boot on start
+set -g @continuum-boot 'on'
+# Set the strategy for tmux-resurrect to use nvim sessions
+set -g @resurrect-strategy-nvim 'session'
+# Disable the bell action
+set-option -g bell-action none
+# Set the default shell to zsh without global rc files
+set-option -g default-command "zsh --no-globalrcs"
+# Enable setting terminal titles
+set -g set-titles on
+# Set the format for terminal titles
+set -g set-titles-string '#T #{pane_current_command}'
+# Set the window size to the smallest
+set -g window-size smallest
+# Enable aggressive resize for windows
+setw -g aggressive-resize on
+# Enable mouse support
+set -g mouse on
+# Set the default terminal type to tmux-256color
+set -g default-terminal "tmux-256color"
+# Append terminal overrides for xterm-256color
+set-option -ga terminal-overrides ",xterm-256color:Tc"
+# Bind 'r' to reload the tmux config and display a message
+bind r source-file ~/.tmux.conf \; display "Config reloaded!"
+# Initialize TMUX plugin manager (keep this line at the very bottom of tmux.conf)
+run '~/.tmux/plugins/tpm/tpm'

.zshrc ADDED Viewed

	@@ -0,0 +1,498 @@

+export LANG=ja_JP.UTF-8
+export LC_ALL=ja_JP.UTF-8
+display_custom_help() {
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    printf "%s\n" "$(conda env list)"
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "LLMs"
+    echo "---"
+    echo "conda activate openwebui && open-webui serve --port 6969"
+    echo "ollama serve"
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "Taggers + Captioners"
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "JTP2"
+    echo "---"
+    echo "~/toolkit/jtp2 <dir>"
+    echo "Joy Captioner"
+    echo "---"
+    echo "~/source/repos/joy/joy <dir> --custom_prompt \"<prompt>\" --caption_type custom"
+    echo "Waifu Diffusion Tagger:"
+    echo "---"
+    echo "python ~/source/repos/wdv3-timm/wdv3_timm.py <dir> --model eva02"
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "Database Stuff"
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "Redis"
+    echo "---"
+    echo "~/db/redis-stable/src/redis-server      : Start server."
+    echo "PostgreSQL"
+    echo "---"
+    echo "psql -d postgres -h /tmp                : Connect using socket directory."
+    echo "Start server:"
+    echo "pg_ctl -D \$HOME/db/postgresql/data -l \$HOME/db/pgsql.log start"
+#    echo "Commands, Aliases, and Custom Functions:"
+#    echo "----------------------------------------------------------------------------------------------------------------------"
+#    echo "pie                                         : \`pip install -e . --use-pep517\`"
+#    echo "gcs                                         : \`git clone --recurse-submodules\`"
+#    echo "dust                                        : A more intuitive version of du."
+#    echo "ranger                                      : A vim inspired file manager."
+#    echo "htop                                        : Interactive process viewer."
+#    echo "nvtop                                       : Interactive GPU process viewer."
+#    echo "nvitop                                      : An even more interactive GPU process viewer."
+#    echo "nvim                                        : Alias for vim."
+#    echo "rt                                          : Edit tmux config and reload it."
+#    echo "zr                                          : Edit zsh config and reload it."
+#    echo "ta                                          : Attach to tmux session."
+#    echo "ga                                          : Git add, commit, and push."
+#    echo "gs                                          : Git status."
+#    echo "wd                                          : Word diff in git."
+#    echo "grabber                                     : Alias for Grabber-cli."
+#    echo "ls                                          : Alias for 'ls --color=always'."
+#    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "- 🐺 TOOLS                                                                                                           -"
+    echo "----------------------------------------------------------------------------------------------------------------------"
+    echo "nv                                          : Returns the cuda version number."iexport LANG=ja_JP.UTF-8
+export LC_ALL=ja_JP.UTF-8
+    echo "remove_repetition                           : Removes repetition in txt files in a target directory."
+    echo "copy_sample_prompts                         : Copies ./sample-prompt.txt file from the current dir to datasets/furry."
+    echo "remove_number_prefix                        : Removes all numbers prefixed by a _ from the end of every file."
+    echo "count_captions                              : Counts *.caption and *.txt files in each subdirectory."
+    echo "count_captions_per_folder                   : Counts *.caption and *.txt files in each subdirectory individually."
+    echo "llama                                       : Runs Meta-Llama-3-8B-Instruct on port 6969."
+    echo "copy_matching_caption_files                 : Copies matching .caption files for <dir> to the current directory."
+    echo "c                                           : Change to ComfyUI directory and start the server."
+    echo "t                                           : Start TensorBoard with logs directory."
+    echo "png2mp4                                     : Convert PNG sequence to MP4 video."
+    echo "seed <file>                                 : Display the seed from a safetensors file."
+    echo "swch <branch>                               : Clean repo and switch to specified git branch."
+    echo "convert_to_jxl <directory>                  : Convert JPG, JPEG, and PNG files to JXL in the specified directory."
+    echo "convert_pxl_to_png <directory>              : Convert PXL files to PNG in the specified directory."
+    echo "replace_text_in_files [dir] <src> <replace> : Perform text replacement on *.txt files in a target directory."
+    echo "update_dir [directory]                      : Update git repositories in subdirectories."
+    echo "inject_to_captions [dir] \"txt\"              : Add prefix to the beginning of each text file in a directory."
+    echo "chop_lora <input_file>                      : Generate multiple versions of a Lora file with different presets."
+    echo "----------------------------------------------------------------------------------------------------------------------"
+}
+export RUST_BACKTRACE=1
+# This function `nv` retrieves the version of the NVIDIA CUDA Compiler (nvcc) installed on the system.
+# It extracts the version number from the `nvcc --version` command output.
+# The version number is then formatted by removing the dot (e.g., 12.6 becomes 126).
+# Finally, the function returns the formatted version number.
+function nv() {
+    # Get the nvcc version output
+    local nvcc_output=$(nvcc --version)
+    # Extract the version number (12.6)
+    local version=$(echo "$nvcc_output" | grep -oP 'release \K[0-9]+\.[0-9]+')
+    # Remove the dot to get 126
+    local result=$(echo "$version" | tr -d '.')
+    # Print the result
+    echo $result
+}
+export BNB_CUDA_VERSION=126
+# Function to remove consecutive repeated words in text files within a directory
+remove_repetition() {
+  local dir=$1  # The directory to search for text files
+  # Find all .txt files in the specified directory and process each file
+  find "$dir" -type f -name "*.txt" | while read -r file; do
+    # Use awk to process each line of the file
+    awk '
+    {
+      n = split($0, words, " ")  # Split the line into words
+      for (i = n; i > 1; i--) {  # Iterate from the last word to the second word
+        if (words[i] != words[i-1]) break  # Stop if the current word is not equal to the previous word
+      }
+      for (j = 1; j <= i; j++) {  # Print the words up to the point where repetition ends
+        printf "%s%s", words[j], (j == i ? ORS : OFS)  # Print the word followed by a space or newline
+      }
+    }
+    ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"  # Save the processed content to a temporary file and replace the original file
+  done
+}
+# This alias 'pie' is a shortcut for installing a Python package in editable mode
+# using the pip command with the --use-pep517 option.
+alias pie='pip install -e . --use-pep517'
+# Function to remove specific tags from all *.txt files in a target directory recursively
+remove_boys() {
+    # Assign the first argument passed to the function to the variable target_dir
+    local target_dir="$1"
+    # Find all *.txt files in the target directory and its subdirectories
+    find "$target_dir" -type f -name "*.txt" | while read -r file; do
+        # Use sed to remove occurrences of [1-9]boy, [1-9]boys, [1-9]girl, and [1-9]girls along with a comma and space character
+        # -i.bak creates a backup of the original file with a .bak extension
+        # -E enables extended regular expressions
+        sed -i.bak -E 's/, ([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)//g' "$file"
+    done
+}
+export DOTNET_CLI_TELEMETRY_OPTOUT=1
+# Organizes a sample prompt file from the current directory to datasets/furry.
+# It moves the file named sample-prompts.txt to either
+# ~/datasets/furry/sample_prompts/pony or ~/datasets/furry/sample_prompts/compass based on the content.
+# If the file contains the regexp 'score_*', it is moved to ~/datasets/furry/sample_prompts/pony.
+# Otherwise, it is moved to ~/datasets/furry/sample_prompts/compass.
+# The -v option is used with cp to provide verbose output.
+copy_sample_prompts() {
+    file="./sample-prompts.txt"
+    if grep -q 'score_*' "$file"; then
+        cp -v "$file" ~/datasets/furry/sample_prompts/pony/
+    else
+        cp -v "$file" ~/datasets/furry/sample_prompts/compass/
+    fi
+    echo "File has been organized."
+}
+# Removes all numbers prefixed by a _ from the end of every file before the file extension
+remove_number_prefix() {
+  # Loop through all files in the current directory and its subdirectories
+  for file in **/*_[0-9]*.*; do
+    # Get the new file name by removing '_number' before the file extension
+    new_file="${file%_[0-9]*.*}.${file##*.}"
+    # Rename the file
+    mv "$file" "$new_file"
+  done
+}
+# Counts all *.caption and *.txt files in all subdirectories.
+count_captions() {
+  caption_count=$(find . -type f -name "*.caption" | wc -l)
+  txt_count=$(find . -type f -name "*.txt" | wc -l)
+  echo "*.caption files: $caption_count"
+  echo "*.txt files: $txt_count"
+}
+# Counts *.caption and *.txt files in each subdirectory individually.
+count_captions_per_folder() {
+  for dir in */; do
+    echo "Directory: $dir"
+    echo -n "*.caption files: "
+    find "$dir" -type f -name "*.caption" | wc -l
+    echo -n "*.txt files: "
+    find "$dir" -type f -name "*.txt" | wc -l
+  done
+}
+# open-webui
+oui() {
+  conda activate openwebui
+  open-webui serve --port 6969
+}
+llama() {
+  ~/models/Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile -cb -np 4 -a llama-3-8b --embedding --port 11434
+}
+alias gcs='git clone --recurse-submodules'
+# Function to copy matching .caption files
+copy_matching_caption_files() {
+  # Define the target directory
+  TARGET_DIR="$1"
+  # Loop through each image file in the current directory
+  for image_file in *.(jpg|jpeg|png|gif|bmp|tiff|webp|jxl); do
+    # Check if the file exists (to handle cases where no files match the pattern)
+    if [[ -f "$image_file" ]]; then
+      # Extract the base name (without extension)
+      base_name="${image_file%.*}"
+      # Define the corresponding .caption file in the target directory
+      caption_file="$TARGET_DIR/$base_name.caption"
+      # Check if the .caption file exists
+      if [[ -f "$caption_file" ]]; then
+        # Copy the .caption file to the current directory
+        cp "$caption_file" .
+        echo "Copied $caption_file to the current directory."
+      else
+        echo "No matching .caption file for $image_file."
+      fi
+    fi
+  done
+}
+# This script performs a text replacement operation in all .txt files within a specified directory.
+# It takes three arguments:
+# 1. target_dir: The directory containing the .txt files where the text replacement will occur.
+# 2. search_text: The text string that needs to be replaced.
+# 3. replace_text: The text string that will replace the search_text.
+#
+# The script uses a for loop to iterate through all .txt files in the target directory.
+# It utilizes the 'sed' command to perform an in-place replacement of the search_text with the replace_text in each file.
+# After processing all files, it prints a message indicating the completion of the text replacement operation.
+replace_text_in_files() {
+  local target_dir=$1
+  local search_text=$2
+  local replace_text=$3
+  # Loop through all .txt files in the target directory
+  for file in "$target_dir"/*.txt; do
+    # Use sed to replace the text
+    sed -i "s/$search_text/$replace_text/g" "$file"
+  done
+  echo "Text replacement complete in $target_dir!"
+}
+# Example usage:
+# replace_text_in_files "/path/to/directory" "squishy (artist)" "by squishy (artist)"
+# This script adds a specified prefix to the beginning of each text file in a given directory.
+# Usage: inject_to_captions <directory> <prefix>
+# Arguments:
+#   <directory> - The directory containing the text files to be modified.
+#   <prefix> - The prefix to be added to the beginning of each text file.
+# The script checks if the specified directory exists and iterates over each text file in the directory.
+# For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
+# If the directory does not exist, it prints an error message.
+inject_to_captions() {
+    local dir="$1"
+    local prefix="$2"
+    if [[ -d "$dir" ]]; then
+        for file in "$dir"/*.txt; do
+            if [[ -f "$file" ]]; then
+                if ! grep -q "$prefix" "$file"; then
+                    # Use a temporary file to store the modified content
+                    local temp_file=$(mktemp)
+                    echo "${prefix}, $(cat "$file")" > "$temp_file"
+                    mv "$temp_file" "$file"
+                    echo "Added '${prefix}, ' to the front of $file"
+                else
+                    echo "The tag '${prefix}' already exists in $file"
+                fi
+            fi
+        done
+    else
+        echo "Directory $dir does not exist."
+    fi
+}
+# Function to update git repositories in subdirectories
+update_dir() {
+    local target_dir="${1:-.}"
+    # Check if there are any subdirectories
+    if [[ -n "$(find "$target_dir" -mindepth 1 -maxdepth 1 -type d)" ]]; then
+        for dir in "$target_dir"/*/; do
+            if [[ -d "$dir" ]]; then
+                (
+                    cd "$dir" || return
+                    # If the directory is a git repository, pull the latest changes
+                    if [[ -d ".git" ]]; then
+                        echo "Updating $(pwd)"
+                        git pull
+                    fi
+                )
+            fi
+        done
+    fi
+}
+export TOKENIZERS_PARALLELISM=false
+alias grabber="Grabber-cli"
+#export force_color_prompt=yes
+chop_lora() {
+    local input_file="$1"
+    local base_name="${input_file:r}"  # Remove extension
+    # Define presets and their corresponding vector strings
+    declare -A presets=(
+        ["ringdingding"] = "1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0"
+        ["squeaker"]     = "1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0"
+        ["heavylifter"]  = "1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0"
+        ["style1"]       = "1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
+        ["style2"]       = "1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
+        ["beeg"]         = "1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0"
+        ["all"]          = "1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"
+        ["allin"]        = "1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0"
+        ["allmid"]       = "1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0"
+        ["allout"]       = "1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1"
+    )
+    for preset in ${(k)presets}; do
+        local output_file="${base_name}-${preset}.safetensors"
+        local vector_string="${presets[$preset]}"
+        echo "Generating $output_file"
+        python ~/source/repos/resize_lora/chop_blocks.py "$input_file" "$vector_string" -o "$output_file"
+    done
+}
+function swch() {
+    if [ -z "$1" ]; then
+        echo "Please provide a branch name."
+        return 1
+    fi
+    branchname=$1
+    git clean -fxd && git pull && git checkout $branchname
+}
+export COMFYUI_PATH="$HOME/ComfyUI"
+export ZSH="$HOME/.oh-my-zsh"
+ZSH_THEME="kade"
+# CASE_SENSITIVE="true"
+# HYPHEN_INSENSITIVE="true"
+# DISABLE_MAGIC_FUNCTIONS="true"
+# DISABLE_LS_COLORS="true"
+# DISABLE_AUTO_TITLE="true"
+# ENABLE_CORRECTION="true"
+# COMPLETION_WAITING_DOTS="true"
+# DISABLE_UNTRACKED_FILES_DIRTY="true"
+plugins=(git autojump conda-env)
+extract_iframes() {
+    # Assign input arguments
+    input_file="$1"
+    scene_change_fraction="${2:-0.1}"
+    # Get the base filename without extension
+    base_name=$(basename "$input_file" .webm)
+    # Run ffmpeg command
+    /usr/bin/ffmpeg -i "$input_file" -f image2 -vf "select=eq(pict_type\,PICT_TYPE_I)*gt(scene\,$scene_change_fraction),showinfo" -fps_mode vfr "${base_name}-%06d.png"
+}
+convert_to_jxl() {
+    local target_directory="$1"
+    # Ensure the target directory exists
+    if [[ ! -d "$target_directory" ]]; then
+        echo "The specified directory does not exist: $target_directory" >&2
+        return 1
+    fi
+    # Find all JPG, JPEG, and PNG files in the target directory and all subdirectories
+    find "$target_directory" \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" \) -type f | while read -r file; do
+        input_path="$file"
+        output_path="${file%.*}.jxl"
+        # Convert to JXL using ImageMagick
+        if magick convert "$input_path" "$output_path"; then
+            echo "Converted: $input_path -> $output_path"
+        else
+            echo "Failed to convert $input_path" >&2
+        fi
+    done
+    echo "Conversion complete."
+}
+convert_pxl_to_png() {
+    local target_directory="$1"
+    # Ensure the target directory exists
+    if [[ ! -d "$target_directory" ]]; then
+        echo "The specified directory does not exist: $target_directory" >&2
+        return 1
+    fi
+    # Find all PXL files in the target directory and all subdirectories
+    find "$target_directory" -type f -name "*.pxl" | while read -r file; do
+        input_path="$file"
+        output_path="${file%.pxl}.png"
+        # Convert PXL to PNG using ImageMagick
+        if magick convert "$input_path" "$output_path"; then
+            echo "Converted: $input_path -> $output_path"
+        else
+            echo "Failed to convert $input_path" >&2
+        fi
+    done
+    echo "Conversion complete."
+}
+seed() {
+    local filePath="$1"
+    python3 -c "
+import safetensors, json
+filePath = '$filePath'
+print(json.loads(safetensors.safe_open(filePath, 'np').metadata().get('ss_seed', 'Not found')))
+"
+}
+png2mp4() {
+  ffmpeg -framerate 8 -pattern_type glob -i '*.png' -vf scale=512x512 -crf 28 \
+  -c:v libx264 -pix_fmt yuv420p out.mp4
+}
+source $ZSH/oh-my-zsh.sh
+export PATH=$PATH:$HOME/.local/bin:$HOME/source/repos/dataset-tools/target/x86_64-unknown-linux-gnu/release:$HOME/.cargo/bin:$HOME/miniconda3/bin:$HOME/toolkit:$HOME/db/redis-stable/src:$HOME/db/postgresql/bin
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
+export COMFYUI_MODEL_PATH=/home/kade/ComfyUI/models
+c_old() {
+    cd ~/ComfyUI &&
+    python3.12 main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --fast
+}
+c() {
+    cd ~/ComfyUI &&
+    conda activate comfyui
+    python main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --front-end-version Comfy-Org/ComfyUI_frontend@latest --fast
+}
+alias t="tensorboard --logdir=$HOME/output_dir/logs"
+alias nvim="vim"
+alias rt="vim ~/.tmux.conf && echo \"Reloading tmux config\" && tmux source ~/.tmux.conf"
+alias zr="vim ~/.zshrc && echo \"Reloading zsh config\" && source ~/.zshrc"
+alias ta="tmux att"
+alias ga="git add . && git commit -avs && git push"
+alias gs="git status"
+alias wd="git diff --word-diff-regex='[^,]+' --patience"
+source /home/kade/.config/broot/launcher/bash/br
+[ -f ~/.fzf.zsh ] && source ~/.fzf.zsh
+alias ls='ls --color=always'
+# >>> conda initialize >>>
+# !! Contents within this block are managed by 'conda init' !!
+__conda_setup="$('/home/kade/miniconda3/bin/conda' 'shell.zsh' 'hook' 2> /dev/null)"
+if [ $? -eq 0 ]; then
+    eval "$__conda_setup"
+else
+    if [ -f "/home/kade/miniconda3/etc/profile.d/conda.sh" ]; then
+        . "/home/kade/miniconda3/etc/profile.d/conda.sh"
+    else
+        export PATH="/home/kade/miniconda3/bin:$PATH"
+    fi
+fi
+unset __conda_setup
+# <<< conda initialize <<<
+unset CONDA_CHANGEPS1
+function conda_prompt_info() {
+  if [[ -n "$CONDA_DEFAULT_ENV" ]]; then
+    echo "(${CONDA_DEFAULT_ENV})"
+  fi
+}
+display_custom_help

9em124t2-499968/clip_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d7b0548d12fa649370896982c2af9d03d43285b782bd47639c96e6e0b29473c
+size 1713067838

9em124t2-499968/config.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+wandb_project: joy-caption-1
+device_batch_size: 2
+batch_size: 256
+learning_rate: 0.0002
+warmup_samples: 18000
+max_samples: 500000
+save_every: 50000
+test_every: 50000
+use_amp: true
+grad_scaler: true
+lr_scheduler_type: cosine
+min_lr_ratio: 0.0
+allow_tf32: true
+seed: 69
+num_workers: 8
+optimizer_type: adamw
+adam_beta1: 0.9
+adam_beta2: 0.999
+adam_eps: 1.0e-08
+adam_weight_decay: 0.0
+clip_grad_norm: 1.0
+dataset: fancyfeast/joy-captioning-20240917a
+clip_model: google/siglip-so400m-patch14-384
+text_model: meta-llama/Meta-Llama-3.1-8B
+resume: null
+gradient_checkpointing: false
+test_size: 2048
+grad_scaler_init: 65536.0
+max_caption_length: 257
+num_image_tokens: 32
+adapter_type: mlp
+text_model_dtype: bfloat16
+pre_test: false
+train_image_model: true
+image_model_lr: null
+train_lora: true
+lora_r: 64
+lora_alpha: 16
+lora_dropout: 0.1

9em124t2-499968/image_adapter.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e53c3bf8df745a3c19ae3c70dbf9bf23cfdc8f3fdb937000a4eafd2a36914661
+size 86067714

9em124t2-499968/text_model/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: meta-llama/Meta-Llama-3.1-8B
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

9em124t2-499968/text_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

9em124t2-499968/text_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b48221de174ab0db7b46b4833118c5c0a4c2bf0b51b77b4cc4ab04651bd06cca
+size 109069176

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Balazs Horvath
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

ascii_art/gaeros ADDED Viewed

	@@ -0,0 +1,11 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+ascii_art = """
+▄▄ •  ▄▄▄· ▄▄▄ .▄▄▄        .▄▄ ·
+▐█ ▀ ▪▐█ ▀█ ▀▄.▀·▀▄ █·▪     ▐█ ▀.
+▄█ ▀█▄▄█▀▀█ ▐▀▀▪▄▐▀▀▄  ▄█▀▄ ▄▀▀▀█▄
+▐█▄▪▐█▐█ ▪▐▌▐█▄▄▌▐█•█▌▐█▌.▐▌▐█▄▪▐█
+·▀▀▀▀  ▀  ▀  ▀▀▀ .▀  ▀ ▀█▄▀▪ ▀▀▀▀
+"""
+print(ascii_art)

ascii_art/kade ADDED Viewed

	@@ -0,0 +1,13 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+ascii_art = """
+ ▄ •▄  ▄▄▄· ·▄▄▄▄  ▄▄▄ .
+█▌▄▌▪▐█ ▀█ ██▪ ██ ▀▄.▀·
+▐▀▀▄·▄█▀▀█ ▐█· ▐█▌▐▀▀▪▄
+▐█.█▌▐█ ▪▐▌██. ██ ▐█▄▄▌
+·▀  ▀ ▀  ▀ ▀▀▀▀▀•  ▀▀▀
+"""
+print(ascii_art)

crawl/crawl ADDED Viewed

	@@ -0,0 +1,146 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Web Crawler and Content Saver
+This module provides functionality to crawl web pages, extract content,
+and save the results including markdown text and images. It uses the
+WebCrawler class from crawl4ai and implements parallel image downloading.
+"""
+import sys
+import os
+import re
+import platform
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import requests
+from crawl4ai import WebCrawler
+def create_crawler():
+    """
+    Create and initialize a WebCrawler instance.
+    Returns:
+        WebCrawler: An initialized WebCrawler object.
+    """
+    crawler = WebCrawler(verbose=True)
+    crawler.warmup()
+    return crawler
+def sanitize_filename(filename):
+    """
+    Remove invalid characters from a filename to make it Windows-compatible.
+    Args:
+        filename (str): The original filename.
+    Returns:
+        str: The sanitized filename.
+    """
+    # Remove invalid characters for Windows file names
+    return re.sub(r'[<>:"/\\|?*]', '', filename)
+def download_image(session, image_url, save_dir):
+    """
+    Download an image from a given URL and save it to the specified directory.
+    Args:
+        session (requests.Session):
+            The requests session to use for downloading.
+        image_url (str):
+            The URL of the image to download.
+        save_dir (str):
+            The directory to save the downloaded image.
+    """
+    try:
+        # Ensure the URL has a scheme
+        if not re.match(r'^https?://', image_url):
+            image_url = 'https://' + image_url.lstrip('/')
+        image_filename = os.path.basename(image_url).split('?')[0]
+        sanitized_image_filename = sanitize_filename(image_filename)
+        image_path = os.path.join(save_dir, sanitized_image_filename)
+        response = session.get(image_url, stream=True)
+        response.raise_for_status()
+        with open(image_path, 'wb') as image_file:
+            for chunk in response.iter_content(chunk_size=8192):
+                image_file.write(chunk)
+        print(f"Saved image: {image_path}")
+    except requests.RequestException as e:
+        print(f"Error downloading image {image_url}: {str(e)}")
+    except IOError as e:
+        print(f"Error saving image {image_url}: {str(e)}")
+def save_result(target_url):
+    """
+    Crawl a given URL, extract content, and save the results.
+    This function crawls the specified URL, saves the markdown content,
+    and downloads all associated images in parallel.
+    Args:
+        target_url (str): The URL to crawl and save content from.
+    """
+    crawler = create_crawler()
+    result = crawler.run(url=target_url)
+    title = result.metadata.get('title', 'untitled')
+    sanitized_title = sanitize_filename(title).replace(" ", "_")
+    # Choose the appropriate base path based on the operating system
+    if platform.system() == "Windows":
+        base_path = "E:\\knowledgebase\\Saved Websites\\"
+    else:
+        base_path = "/home/kade/saved_websites/"
+    save_dir = os.path.join(base_path, sanitized_title)
+    os.makedirs(save_dir, exist_ok=True)
+    # Save markdown
+    save_path = os.path.join(save_dir, f"{sanitized_title}.md")
+    with open(save_path, "w", encoding="utf-8") as file:
+        file.write(result.markdown)
+    print(f"Saved markdown to {save_path}")
+    # Save images in parallel
+    if 'images' in result.media and isinstance(result.media['images'], list):
+        session = requests.Session()
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
+                          'AppleWebKit/537.36 (KHTML, like Gecko) '
+                          'Chrome/91.0.4472.124 Safari/537.36',
+            'Referer': target_url,
+            'Accept': ('image/avif,image/webp,image/apng,image/svg+xml,'
+                       'image/*,*/*;q=0.8'),
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Sec-Fetch-Dest': 'image',
+            'Sec-Fetch-Mode': 'no-cors',
+            'Sec-Fetch-Site': 'cross-site',
+        }
+        session.headers.update(headers)
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = []
+            for image_data in result.media['images']:
+                if 'src' in image_data:
+                    futures.append(executor.submit(download_image,
+                                                   session,
+                                                   image_data['src'],
+                                                   save_dir))
+            for future in as_completed(futures):
+                future.result()
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python crawl.py <URL>")
+    else:
+        url = sys.argv[1]
+        save_result(url)

crawl/crawl4ai.pyi ADDED Viewed

	@@ -0,0 +1,58 @@

+"""
+This module provides a WebCrawler class for AI-related web crawling tasks.
+The WebCrawler class is designed to crawl web pages, potentially for
+AI-related data extraction or analysis. It offers methods for initializing
+the crawler, warming it up, and running crawl operations on specified URLs.
+Classes:
+    WebCrawler: A web crawler for AI-related tasks.
+Example:
+    crawler = WebCrawler(verbose=True)
+    crawler.warmup()
+    result = crawler.run("https://example.com")
+"""
+from typing import Any
+class WebCrawler:
+    """
+    A web crawler for AI-related tasks.
+    This class provides functionality to crawl web pages,
+    potentially for AI-related data extraction or analysis.
+    Attributes:
+        verbose (bool): If True, enables verbose output during crawling.
+    Methods:
+        warmup(): Prepares the crawler for operation.
+        run(url: str): Crawls the specified URL and returns the result.
+    """
+    def __init__(self, verbose: bool = False) -> None:
+        self.verbose: bool = verbose
+    def warmup(self) -> None:
+        """
+        Prepares the crawler for operation.
+        This method should be called before running the crawler to ensure
+        all necessary resources and configurations are set up.
+        """
+    def run(self, url: str) -> Any:
+        """
+        Crawls the specified URL and returns the result.
+        Args:
+            url (str): The URL to crawl.
+        Returns:
+            Any: The result of the crawling operation. The specific type
+                 depends on the implementation and could be raw HTML,
+                 parsed data, or any other relevant information.
+        """

crawl/crawl_wikipedia ADDED Viewed

	@@ -0,0 +1,182 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Web Crawler and Content Saver
+This module provides functionality to crawl web pages, extract content,
+and save the results including markdown text and images. It uses the
+WebCrawler class from crawl4ai and implements parallel image downloading.
+"""
+import sys
+import os
+import re
+import platform
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from urllib.parse import urljoin
+from bs4 import BeautifulSoup
+import requests
+from crawl4ai import WebCrawler
+def create_crawler():
+    """
+    Create and initialize a WebCrawler instance.
+    Returns:
+        WebCrawler: An initialized WebCrawler object.
+    """
+    crawler = WebCrawler(verbose=True)
+    crawler.warmup()
+    return crawler
+def sanitize_filename(filename):
+    """
+    Remove invalid characters from a filename to make it Windows-compatible.
+    Args:
+        filename (str): The original filename.
+    Returns:
+        str: The sanitized filename.
+    """
+    # Remove invalid characters for Windows file names
+    return re.sub(r'[<>:"/\\|?*]', '', filename)
+def get_full_size_image_url(session, image_url, base_url):
+    """
+    Attempt to find the full-size image URL from a thumbnail URL.
+    Args:
+        session (requests.Session): The requests session to use.
+        image_url (str): The thumbnail image URL.
+        base_url (str): The base URL of the page being crawled.
+    Returns:
+        str: The full-size image URL if found, otherwise the original URL.
+    """
+    try:
+        response = session.get(image_url)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, 'html.parser')
+        # Look for common full-size image patterns
+        full_size_link = soup.find('a', class_=re.compile(r'fullimage|full-size'))
+        if full_size_link and full_size_link.get('href'):
+            return urljoin(base_url, full_size_link['href'])
+        # If no full-size link is found, return the original URL
+        return image_url
+    except Exception as e:
+        print(f"Error finding full-size image for {image_url}: {str(e)}")
+        return image_url
+def download_image(session, image_url, save_dir, base_url):
+    """
+    Download an image from a given URL and save it to the specified directory.
+    Attempt to get the full-size image if the URL is a thumbnail.
+    Args:
+        session (requests.Session): The requests session to use for downloading.
+        image_url (str): The URL of the image to download.
+        save_dir (str): The directory to save the downloaded image.
+        base_url (str): The base URL of the page being crawled.
+    """
+    try:
+        full_size_url = get_full_size_image_url(session, image_url, base_url)
+        image_filename = os.path.basename(full_size_url).split('?')[0]
+        sanitized_image_filename = sanitize_filename(image_filename)
+        image_path = os.path.join(save_dir, sanitized_image_filename)
+        if os.path.exists(image_path):
+            print(f"Image already exists: {image_path}")
+            return
+        response = session.get(full_size_url, stream=True)
+        response.raise_for_status()
+        with open(image_path, 'wb') as image_file:
+            for chunk in response.iter_content(chunk_size=8192):
+                image_file.write(chunk)
+        print(f"Saved full-size image: {image_path}")
+    except requests.RequestException as e:
+        print(f"Error downloading image {full_size_url}: {str(e)}")
+    except IOError as e:
+        print(f"Error saving image {full_size_url}: {str(e)}")
+def save_result(target_url):
+    """
+    Crawl a given URL, extract content, and save the results.
+    This function crawls the specified URL, saves the markdown content,
+    and downloads all associated images in parallel.
+    Args:
+        target_url (str): The URL to crawl and save content from.
+    """
+    crawler = create_crawler()
+    result = crawler.run(url=target_url)
+    title = result.metadata.get('title', 'untitled')
+    sanitized_title = sanitize_filename(title).replace(" ", "_")
+    # Choose the appropriate base path based on the operating system
+    if platform.system() == "Windows":
+        base_path = "E:\\knowledgebase\\Saved Websites\\"
+    else:
+        base_path = "/home/kade/saved_websites/"
+    save_dir = os.path.join(base_path, sanitized_title)
+    os.makedirs(save_dir, exist_ok=True)
+    # Save markdown
+    save_path = os.path.join(save_dir, f"{sanitized_title}.md")
+    #sanitized_markdown = sanitize_citations(result.markdown)
+    with open(save_path, "w", encoding="utf-8") as file:
+        file.write(result.markdown)
+        #file.write(sanitized_markdown)
+    print(f"Saved markdown to {save_path}")
+    # Save images in parallel
+    if 'images' in result.media and isinstance(result.media['images'], list):
+        session = requests.Session()
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
+                          'AppleWebKit/537.36 (KHTML, like Gecko) '
+                          'Chrome/91.0.4472.124 Safari/537.36',
+            'Referer': target_url,
+            'Accept': ('image/avif,image/webp,image/apng,image/svg+xml,'
+                       'image/*,*/*;q=0.8'),
+            'Accept-Language': 'en-US,en;q=0.9',
+            'Sec-Fetch-Dest': 'image',
+            'Sec-Fetch-Mode': 'no-cors',
+            'Sec-Fetch-Site': 'cross-site',
+        }
+        session.headers.update(headers)
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            futures = []
+            for image_data in result.media['images']:
+                if 'src' in image_data:
+                    # Use urljoin to create absolute URLs for image sources
+                    absolute_image_url = urljoin(target_url, image_data['src'])
+                    futures.append(executor.submit(download_image,
+                                                   session,
+                                                   absolute_image_url,
+                                                   save_dir,
+                                                   target_url))  # Pass target_url as base_url
+            for future in as_completed(futures):
+                future.result()
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Usage: python crawl.py <URL>")
+    else:
+        url = sys.argv[1]
+        save_result(url)

joy ADDED Viewed

	@@ -0,0 +1,555 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+JoyCaption Alpha One
+This module provides functionality for generating captions for images using a
+combination of CLIP, LLM, and custom image adapters. It supports various
+caption types, tones, and lengths.
+The main components include:
+- Loading and initializing models (CLIP, LLM, image adapter)
+- Processing images and generating captions
+- Command-line interface for batch processing images in a directory
+"""
+import os
+import argparse
+import re
+from pathlib import Path
+from PIL import Image
+import pillow_jxl
+import torch
+import torchvision.transforms.functional as TVF
+from transformers import (
+    AutoModel,
+    AutoProcessor,
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    PreTrainedTokenizer,
+    PreTrainedTokenizerFast,
+)
+from torch import nn
+CLIP_PATH = "google/siglip-so400m-patch14-384"
+MODEL_PATH = "meta-llama/Meta-Llama-3.1-8B"
+CHECKPOINT_PATH = Path(__file__).resolve().parent / "9em124t2-499968"
+CAPTION_TYPE_MAP = {
+    ("descriptive", "formal", False, False): [
+        "Write a descriptive caption for this image in a formal tone."
+    ],
+    ("descriptive", "formal", False, True): [
+        "Write a descriptive caption for this image in a formal tone within "
+        "{word_count} words."
+    ],
+    ("descriptive", "formal", True, False): [
+        "Write a {length} descriptive caption for this image in a formal tone."
+    ],
+    ("descriptive", "informal", False, False): [
+        "Write a descriptive caption for this image in a casual tone."
+    ],
+    ("descriptive", "informal", False, True): [
+        "Write a descriptive caption for this image in a casual tone within "
+        "{word_count} words."
+    ],
+    ("descriptive", "informal", True, False): [
+        "Write a {length} descriptive caption for this image in a casual tone."
+    ],
+    ("training_prompt", "formal", False, False): [
+        "Write a stable diffusion prompt for this image."
+    ],
+    ("training_prompt", "formal", False, True): [
+        "Write a stable diffusion prompt for this image within {word_count} "
+        "words."
+    ],
+    ("training_prompt", "formal", True, False): [
+        "Write a {length} stable diffusion prompt for this image."
+    ],
+    ("rng-tags", "formal", False, False): [
+        "Write a list of Booru tags for this image."
+    ],
+    ("rng-tags", "formal", False, True): [
+        "Write a list of Booru tags for this image within {word_count} words."
+    ],
+    ("rng-tags", "formal", True, False): [
+        "Write a {length} list of Booru tags for this image."
+    ],
+}
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+class ImageAdapter(nn.Module):
+    """
+    Custom image adapter module for processing CLIP vision outputs.
+    This module adapts the output of a CLIP vision model to be compatible with
+    a text model. It supports optional layer normalization, positional
+    embeddings, and deep feature extraction.
+    Args:
+        input_features (int): Number of input features from the vision model.
+        output_features (int): Number of output features to match the text model.
+        ln1 (bool): Whether to use layer normalization.
+        pos_emb (bool): Whether to use positional embeddings.
+        num_image_tokens (int): Number of image tokens.
+        deep_extract (bool): Whether to use deep feature extraction.
+    """
+    def __init__(
+        self,
+        input_features: int,
+        output_features: int,
+        ln1: bool,
+        pos_emb: bool,
+        num_image_tokens: int,
+        deep_extract: bool,
+    ):
+        super().__init__()
+        self.deep_extract = deep_extract
+        if self.deep_extract:
+            input_features = input_features * 5
+        self.linear1 = nn.Linear(input_features, output_features)
+        self.activation = nn.GELU()
+        self.linear2 = nn.Linear(output_features, output_features)
+        self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
+        self.pos_emb = None if not pos_emb else nn.Parameter(
+            torch.zeros(num_image_tokens, input_features)
+        )
+        self.other_tokens = nn.Embedding(3, output_features)
+        self.other_tokens.weight.data.normal_(mean=0.0, std=0.02)
+    def forward(self, vision_outputs: torch.Tensor):
+        """
+        Forward pass of the image adapter.
+        Args:
+            vision_outputs (torch.Tensor): Output tensor from the CLIP vision model.
+        Returns:
+            torch.Tensor: Adapted image features.
+        """
+        if self.deep_extract:
+            x = torch.concat((
+                vision_outputs[-2],
+                vision_outputs[3],
+                vision_outputs[7],
+                vision_outputs[13],
+                vision_outputs[20],
+            ), dim=-1)
+            assert len(x.shape) == 3, f"Expected 3, got {len(x.shape)}"
+            assert x.shape[-1] == vision_outputs[-2].shape[-1] * 5, (
+                f"Expected {vision_outputs[-2].shape[-1] * 5}, got {x.shape[-1]}"
+            )
+        else:
+            x = vision_outputs[-2]
+        x = self.ln1(x)
+        if self.pos_emb is not None:
+            assert x.shape[-2:] == self.pos_emb.shape, (
+                f"Expected {self.pos_emb.shape}, got {x.shape[-2:]}"
+            )
+            x = x + self.pos_emb
+        x = self.linear1(x)
+        x = self.activation(x)
+        x = self.linear2(x)
+        other_tokens = self.other_tokens(
+            torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(
+                x.shape[0], -1
+            )
+        )
+        assert other_tokens.shape == (x.shape[0], 2, x.shape[2]), (
+            f"Expected {(x.shape[0], 2, x.shape[2])}, got {other_tokens.shape}"
+        )
+        x = torch.cat((other_tokens[:, 0:1], x, other_tokens[:, 1:2]), dim=1)
+        return x
+    def get_eot_embedding(self):
+        """
+        Get the end-of-text embedding.
+        Returns:
+            torch.Tensor: The end-of-text embedding.
+        """
+        return self.other_tokens(
+            torch.tensor([2], device=self.other_tokens.weight.device)
+        ).squeeze(0)
+class JoyCaptionModel:
+    """
+    A class for generating captions for images using CLIP, LLM, and custom image adapters.
+    This class encapsulates the functionality to load and initialize various models
+    (CLIP, LLM, image adapter) and use them to process images and generate captions.
+    It supports different caption types, tones, and lengths.
+    Attributes:
+        clip_model: The CLIP vision model for processing images.
+        text_model: The language model for generating captions.
+        image_adapter: Custom adapter for processing CLIP vision outputs.
+        tokenizer: Tokenizer for the language model.
+    Methods:
+        load_models(): Load and initialize all required models.
+        process_image(input_image, caption_type, caption_tone, caption_length):
+            Process an input image and generate a caption based on specified parameters.
+    """
+    def __init__(self):
+        self.clip_model = None
+        self.text_model = None
+        self.image_adapter = None
+        self.tokenizer = None
+    def load_models(self):
+        """
+        Load and initialize all required models (CLIP, LLM, image adapter).
+        """
+        print("Loading CLIP")
+        self.clip_model = AutoModel.from_pretrained(CLIP_PATH)
+        self.clip_model = self.clip_model.vision_model
+        if (CHECKPOINT_PATH / "clip_model.pt").exists():
+            print("Loading VLM's custom vision model")
+            checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu')
+            checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
+            self.clip_model.load_state_dict(checkpoint)
+            del checkpoint
+        self.clip_model.eval()
+        self.clip_model.requires_grad_(False)
+        self.clip_model.to("cuda")
+        print("Loading tokenizer")
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
+        assert isinstance(self.tokenizer, PreTrainedTokenizer) or isinstance(
+            self.tokenizer, PreTrainedTokenizerFast
+        ), f"Tokenizer is of type {type(self.tokenizer)}"
+        print("Loading LLM")
+        if (CHECKPOINT_PATH / "text_model").exists():
+            print("Loading VLM's custom text model")
+            self.text_model = AutoModelForCausalLM.from_pretrained(
+                CHECKPOINT_PATH / "text_model",
+                device_map=0,
+                torch_dtype=torch.bfloat16
+            )
+        else:
+            self.text_model = AutoModelForCausalLM.from_pretrained(
+                MODEL_PATH,
+                device_map="auto",
+                torch_dtype=torch.bfloat16
+            )
+        self.text_model.eval()
+        print("Loading image adapter")
+        self.image_adapter = ImageAdapter(
+            self.clip_model.config.hidden_size,
+            self.text_model.config.hidden_size,
+            False,
+            False,
+            38,
+            False
+        )
+        self.image_adapter.load_state_dict(
+            torch.load(CHECKPOINT_PATH / "image_adapter.pt", map_location="cpu")
+        )
+        self.image_adapter.eval()
+        self.image_adapter.to("cuda")
+    @torch.no_grad()
+    def process_image(self,
+                     input_image: Image.Image,
+                     caption_type: str,
+                     caption_tone: str,
+                     caption_length: str | int,
+                     custom_prompt: str = None) -> str:
+        """
+        Process an input image and generate a caption based on specified parameters.
+        """
+        torch.cuda.empty_cache()
+        if caption_type == "custom" and custom_prompt:
+            prompt_str = custom_prompt
+        else:
+            prompt_str = self._get_prompt_string(caption_type, caption_tone, caption_length)
+        print(f"Prompt: {prompt_str}")
+        pixel_values = self._preprocess_image(input_image)
+        prompt = self._tokenize_prompt(prompt_str)
+        embedded_images = self._embed_image(pixel_values)
+        inputs_embeds, input_ids, attention_mask = self._construct_inputs(embedded_images, prompt)
+        generate_ids = self._generate_caption(inputs_embeds, input_ids, attention_mask)
+        caption = self._decode_caption(generate_ids, input_ids)
+        return caption.strip()
+    def _get_prompt_string(self, caption_type, caption_tone, caption_length):
+        length = None if caption_length == "any" else caption_length
+        if isinstance(length, str):
+            try:
+                length = int(length)
+            except ValueError:
+                pass
+        if caption_type in {"rng-tags", "training_prompt"}:
+            caption_tone = "formal"
+        prompt_key = (
+            caption_type,
+            caption_tone,
+            isinstance(length, str),
+            isinstance(length, int)
+        )
+        if prompt_key not in CAPTION_TYPE_MAP:
+            raise ValueError(f"Invalid caption type: {prompt_key}")
+        prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(
+            length=length, word_count=length
+        )
+        return prompt_str
+    def _preprocess_image(self, input_image):
+        image = input_image.resize((384, 384), Image.LANCZOS)
+        pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
+        pixel_values = TVF.normalize(pixel_values, [0.5], [0.5])
+        pixel_values = pixel_values.to('cuda')
+        return pixel_values
+    def _tokenize_prompt(self, prompt_str):
+        prompt = self.tokenizer.encode(
+            prompt_str,
+            return_tensors='pt',
+            padding=False,
+            truncation=False,
+            add_special_tokens=False
+        )
+        return prompt
+    def _embed_image(self, pixel_values):
+        with torch.amp.autocast_mode.autocast('cuda', enabled=True):
+            vision_outputs = self.clip_model(pixel_values=pixel_values, output_hidden_states=True)
+            image_features = vision_outputs.hidden_states
+            embedded_images = self.image_adapter(image_features)
+            embedded_images = embedded_images.to('cuda')
+        return embedded_images
+    def _construct_inputs(self, embedded_images, prompt):
+        prompt_embeds = self.text_model.model.embed_tokens(prompt.to('cuda'))
+        assert prompt_embeds.shape == (1, prompt.shape[1], self.text_model.config.hidden_size), (
+            f"Prompt shape is {prompt_embeds.shape}, expected "
+            f"{(1, prompt.shape[1], self.text_model.config.hidden_size)}"
+        )
+        embedded_bos = self.text_model.model.embed_tokens(
+            torch.tensor([[self.tokenizer.bos_token_id]],
+                         device=self.text_model.device,
+                         dtype=torch.int64)
+        )
+        eot_embed = self.image_adapter.get_eot_embedding().unsqueeze(0).to(
+            dtype=self.text_model.dtype
+        )
+        inputs_embeds = torch.cat([
+            embedded_bos.expand(embedded_images.shape[0], -1, -1),
+            embedded_images.to(dtype=embedded_bos.dtype),
+            prompt_embeds.expand(embedded_images.shape[0], -1, -1),
+            eot_embed.expand(embedded_images.shape[0], -1, -1),
+        ], dim=1)
+        input_ids = torch.cat([
+            torch.tensor([[self.tokenizer.bos_token_id]], dtype=torch.long),
+            torch.zeros((1, embedded_images.shape[1]), dtype=torch.long),
+            prompt,
+            torch.tensor([[self.tokenizer.eos_token_id]], dtype=torch.long),
+        ], dim=1).to('cuda')
+        attention_mask = torch.ones_like(input_ids)
+        return inputs_embeds, input_ids, attention_mask
+    def _generate_caption(self, inputs_embeds, input_ids, attention_mask):
+        generate_ids = self.text_model.generate(
+            input_ids,
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            max_new_tokens=300,
+            do_sample=True,
+            suppress_tokens=None
+        )
+        return generate_ids
+    def _decode_caption(self, generate_ids, input_ids):
+        generate_ids = generate_ids[:, input_ids.shape[1]:]
+        if (generate_ids[0][-1] == self.tokenizer.eos_token_id or
+            generate_ids[0][-1] == self.tokenizer.convert_tokens_to_ids("<|eot_id|>")):
+            generate_ids = generate_ids[:, :-1]
+        caption = self.tokenizer.batch_decode(
+            generate_ids,
+            skip_special_tokens=False,
+            clean_up_tokenization_spaces=False
+        )[0]
+        return caption
+def main():
+    """Generate captions for images in a directory and save them as .caption files."""
+    parser = argparse.ArgumentParser(
+        description="Generate captions for images in a directory and save them as .caption files."
+    )
+    parser.add_argument("directory", type=str, help="Target directory containing images.")
+    parser.add_argument(
+        "--caption_type",
+        type=str,
+        default="descriptive",
+        choices=["descriptive", "training_prompt", "rng-tags", "custom"],
+        help="Type of caption to generate."
+    )
+    parser.add_argument(
+        "--caption_tone",
+        type=str,
+        default="formal",
+        choices=["formal", "informal"],
+        help="Tone of the caption."
+    )
+    parser.add_argument(
+        "--caption_length",
+        type=str,
+        default="any",
+        help="Length of the caption."
+    )
+    parser.add_argument(
+        "--dont-strip-commas",
+        action="store_true",
+        help="If set, commas will not be stripped from the generated captions."
+    )
+    parser.add_argument(
+        "--custom_prompt",
+        type=str,
+        help="Custom prompt for the captioner. Use with --caption_type custom."
+    )
+    parser.add_argument(
+        '--add-commas-to-sentence-ends',
+        action='store_true',
+        help='Add commas after periods in sentences'
+    )
+    parser.add_argument(
+        '--feed-from-tags',
+        action='store_true',
+        help='Use .txt files with the same base filename as the images as input to the captioner'
+    )
+    args = parser.parse_args()
+    # Initialize and load models
+    joy_caption_model = JoyCaptionModel()
+    joy_caption_model.load_models()
+    # Validate custom prompt usage
+    if args.caption_type == "custom" and not args.custom_prompt:
+        parser.error("--custom_prompt is required when using --caption_type custom")
+    elif args.caption_type != "custom" and args.custom_prompt:
+        parser.error("--custom_prompt can only be used with --caption_type custom")
+    image_extensions = {".webp", ".png", ".jpeg", ".jpg", ".jxl"}
+    for image_path in Path(args.directory).rglob("*"):
+        if image_path.suffix.lower() in image_extensions:
+            caption_file = image_path.with_suffix('.caption')
+            # Skip if the caption file already exists
+            if caption_file.exists():
+                print(f"Skipping {image_path}: Caption file already exists.")
+                continue
+            input_image = Image.open(image_path).convert("RGB")
+            # Use custom prompt if specified
+            if args.caption_type == "custom":
+                caption = joy_caption_model.process_image(
+                    input_image,
+                    "custom",
+                    args.caption_tone,
+                    args.caption_length,
+                    custom_prompt=args.custom_prompt
+                )
+            else:
+                # Check for --feed-from-tags
+                if args.feed_from_tags:
+                    tag_file = find_tag_file(image_path)
+                    if tag_file:
+                        with open(tag_file, 'r', encoding='utf-8') as f:
+                            custom_prompt = f.read().strip()
+                        caption = joy_caption_model.process_image(
+                            input_image,
+                            "custom",
+                            args.caption_tone,
+                            args.caption_length,
+                            custom_prompt=custom_prompt
+                        )
+                    else:
+                        caption = joy_caption_model.process_image(
+                            input_image,
+                            args.caption_type,
+                            args.caption_tone,
+                            args.caption_length
+                        )
+                else:
+                    caption = joy_caption_model.process_image(
+                        input_image,
+                        args.caption_type,
+                        args.caption_tone,
+                        args.caption_length
+                    )
+            # Strip commas if the --dont-strip-commas flag is not set
+            if not args.dont_strip_commas:
+                # Existing comma stripping logic
+                caption = re.sub(r',\s*([^\d])', r' \1', caption)
+                # New feature: Add commas after periods if specified
+                if args.add_commas_to_sentence_ends:
+                    caption = re.sub(r'(\.)(\s+)([A-Z])', r'\1,\2\3', caption)
+            print(f"Caption for {image_path}:\n{caption}\n")
+            # Save the caption to a .caption file
+            with open(caption_file, 'w', encoding='utf-8') as f:
+                f.write(caption)
+            print(f"Caption saved to {caption_file}")
+def find_tag_file(image_path):
+    """
+    Find the corresponding .txt file for the given image path.
+    Handles cases where the image has a -(number) suffix.
+    """
+    base_name = image_path.stem
+    tag_file = image_path.with_suffix('.txt')
+    if tag_file.exists():
+        return tag_file
+    # Handle -(number) suffix
+    match = re.match(r'(.+)-\d+$', base_name)
+    if match:
+        base_name = match.group(1)
+        tag_file = image_path.with_name(base_name).with_suffix('.txt')
+        if tag_file.exists():
+            return tag_file
+    return None
+if __name__ == "__main__":
+    main()

jtp2 ADDED Viewed

	@@ -0,0 +1,290 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+JTP2 (Joint Tagger Project 2) Image Classification Script
+This script implements a multi-label classifier for furry images using the
+PILOT2 model. It processes images, generates tags, and saves the results. The
+model is based on a Vision Transformer architecture and uses a custom GatedHead
+for classification.
+Key features:
+- Image preprocessing and transformation
+- Model inference using PILOT2
+- Tag generation with customizable threshold
+- Batch processing of image directories
+- Saving results as text files alongside images
+Usage:
+python jtp2.py <directory> [--threshold <float>]
+"""
+import os
+import json
+import argparse
+from PIL import Image
+import safetensors.torch
+import timm
+from timm.models import VisionTransformer
+import torch
+from torchvision.transforms import transforms
+from torchvision.transforms import InterpolationMode
+import torchvision.transforms.functional as TF
+import pillow_jxl
+torch.set_grad_enabled(False)
+class Fit(torch.nn.Module):
+    """
+    A custom transform module for resizing and padding images.
+    Args:
+        bounds (tuple[int, int] | int): The target dimensions for the image.
+        interpolation (InterpolationMode): The interpolation method for resizing.
+        grow (bool): Whether to allow upscaling of images.
+        pad (float | None): The padding value to use if padding is applied.
+    """
+    def __init__(
+        self,
+        bounds: tuple[int, int] | int,
+        interpolation=InterpolationMode.LANCZOS,
+        grow: bool = True,
+        pad: float | None = None
+    ):
+        super().__init__()
+        self.bounds = (bounds, bounds) if isinstance(bounds, int) else bounds
+        self.interpolation = interpolation
+        self.grow = grow
+        self.pad = pad
+    def forward(self, img: Image) -> Image:
+        """
+        Applies the Fit transform to the input image.
+        Args:
+            img (Image): The input PIL Image.
+        Returns:
+            Image: The transformed PIL Image.
+        """
+        wimg, himg = img.size
+        hbound, wbound = self.bounds
+        hscale = hbound / himg
+        wscale = wbound / wimg
+        if not self.grow:
+            hscale = min(hscale, 1.0)
+            wscale = min(wscale, 1.0)
+        scale = min(hscale, wscale)
+        if scale == 1.0:
+            return img
+        hnew = min(round(himg * scale), hbound)
+        wnew = min(round(wimg * scale), wbound)
+        img = TF.resize(img, (hnew, wnew), self.interpolation)
+        if self.pad is None:
+            return img
+        hpad = hbound - hnew
+        wpad = wbound - wnew
+        tpad = hpad
+        bpad = hpad - tpad
+        lpad = wpad
+        rpad = wpad - lpad
+        return TF.pad(img, (lpad, tpad, rpad, bpad), self.pad)
+    def __repr__(self) -> str:
+        """
+        Returns a string representation of the Fit module.
+        Returns:
+            str: A string describing the module's parameters.
+        """
+        return (
+            f"{self.__class__.__name__}(bounds={self.bounds}, "
+            f"interpolation={self.interpolation.value}, grow={self.grow}, "
+            f"pad={self.pad})"
+        )
+class CompositeAlpha(torch.nn.Module):
+    """
+    A module for compositing images with alpha channels over a background color.
+    Args:
+        background (tuple[float, float, float] | float): The background color to
+        use for compositing.
+    """
+    def __init__(self, background: tuple[float, float, float] | float):
+        super().__init__()
+        self.background = (
+            (background, background, background)
+            if isinstance(background, float)
+            else background
+        )
+        self.background = torch.tensor(self.background).unsqueeze(1).unsqueeze(2)
+    def forward(self, img: torch.Tensor) -> torch.Tensor:
+        """
+        Applies alpha compositing to the input image tensor.
+        Args:
+            img (torch.Tensor): The input image tensor.
+        Returns:
+            torch.Tensor: The composited image tensor.
+        """
+        if img.shape[-3] == 3:
+            return img
+        alpha = img[..., 3, None, :, :]
+        img[..., :3, :, :] *= alpha
+        background = self.background.expand(-1, img.shape[-2], img.shape[-1])
+        if background.ndim == 1:
+            background = background[:, None, None]
+        elif background.ndim == 2:
+            background = background[None, :, :]
+        img[..., :3, :, :] += (1.0 - alpha) * background
+        return img[..., :3, :, :]
+    def __repr__(self) -> str:
+        """
+        Returns a string representation of the CompositeAlpha module.
+        Returns:
+            str: A string describing the module's parameters.
+        """
+        return f"{self.__class__.__name__}(background={self.background})"
+transform = transforms.Compose([
+    Fit((384, 384)),
+    transforms.ToTensor(),
+    CompositeAlpha(0.5),
+    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
+    transforms.CenterCrop((384, 384)),
+])
+model = timm.create_model(
+    "vit_so400m_patch14_siglip_384.webli",
+    pretrained=False,
+    num_classes=9083
+)  # type: VisionTransformer
+class GatedHead(torch.nn.Module):
+    """
+    A custom head module with gating mechanism for the classifier.
+    Args:
+        num_features (int): The number of input features.
+        num_classes (int): The number of output classes.
+    """
+    def __init__(self, num_features: int, num_classes: int):
+        super().__init__()
+        self.num_classes = num_classes
+        self.linear = torch.nn.Linear(num_features, num_classes * 2)
+        self.act = torch.nn.Sigmoid()
+        self.gate = torch.nn.Sigmoid()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Applies the gated head to the input tensor.
+        Args:
+            x (torch.Tensor): The input tensor.
+        Returns:
+            torch.Tensor: The output tensor after applying the gated head.
+        """
+        x = self.linear(x)
+        x = self.act(x[:, :self.num_classes]) * self.gate(x[:, self.num_classes:])
+        return x
+model.head = GatedHead(min(model.head.weight.shape), 9083)
+safetensors.torch.load_model(
+    model, "/home/kade/source/repos/JTP2/JTP_PILOT2-e3-vit_so400m_patch14_siglip_384.safetensors"
+)
+if torch.cuda.is_available():
+    model.cuda()
+    if torch.cuda.get_device_capability()[0] >= 7:  # tensor cores
+        model.to(dtype=torch.float16, memory_format=torch.channels_last)
+model.eval()
+with open("/home/kade/source/repos/JTP2/tags.json", "r", encoding="utf-8") as file:
+    tags = json.load(file)  # type: dict
+allowed_tags = list(tags.keys())
+for idx, tag in enumerate(allowed_tags):
+    allowed_tags[idx] = tag.replace("_", " ")
+sorted_tag_score = {}
+def run_classifier(image, threshold):
+    """
+    Runs the classifier on a single image and returns tags based on the threshold.
+    Args:
+        image (PIL.Image): The input image.
+        threshold (float): The probability threshold for including tags.
+    Returns:
+        tuple: A tuple containing the comma-separated tags and a dictionary of
+        tag probabilities.
+    """
+    global sorted_tag_score
+    img = image.convert('RGBA')
+    tensor = transform(img).unsqueeze(0)
+    if torch.cuda.is_available():
+        tensor = tensor.cuda()
+        if torch.cuda.get_device_capability()[0] >= 7:  # tensor cores
+            tensor = tensor.to(dtype=torch.float16, memory_format=torch.channels_last)
+    with torch.no_grad():
+        probits = model(tensor)[0].cpu()
+        values, indices = probits.topk(250)
+    tag_score = dict()
+    for i in range(indices.size(0)):
+        tag_score[allowed_tags[indices[i]]] = values[i].item()
+    sorted_tag_score = dict(
+        sorted(tag_score.items(), key=lambda item: item[1], reverse=True)
+    )
+    return create_tags(threshold)
+def create_tags(threshold):
+    """
+    Creates a list of tags based on the current sorted_tag_score and the given
+    threshold.
+    Args:
+        threshold (float): The probability threshold for including tags.
+    Returns:
+        tuple: A tuple containing the comma-separated tags and a dictionary of
+        filtered tag probabilities.
+    """
+    global sorted_tag_score
+    filtered_tag_score = {
+        key: value for key, value in sorted_tag_score.items() if value > threshold
+    }
+    text_no_impl = ", ".join(filtered_tag_score.keys())
+    return text_no_impl, filtered_tag_score
+def process_directory(directory, threshold):
+    """
+    Processes all images in a directory and its subdirectories, generating tags
+    for each image.
+    Args:
+        directory (str): The path to the directory containing images.
+        threshold (float): The probability threshold for including tags.
+    Returns:
+        dict: A dictionary mapping image paths to their generated tags.
+    """
+    results = {}
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.jxl')):
+                image_path = os.path.join(root, file)
+                text_file_path = os.path.splitext(image_path)[0] + ".txt"
+                # Skip if a corresponding .txt file already exists
+                if os.path.exists(text_file_path):
+                    continue
+                image = Image.open(image_path)
+                tags, _ = run_classifier(image, threshold)
+                results[image_path] = tags
+                # Save tags to a text file with the same name as the image, using UTF-8 encoding
+                with open(text_file_path, "w", encoding="utf-8") as text_file:
+                    text_file.write(tags)
+    return results
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run inference on a directory of images."
+    )
+    parser.add_argument("directory", type=str, help="Target directory containing images.")
+    parser.add_argument(
+        "--threshold", type=float, default=0.2, help="Threshold for tag filtering."
+    )
+    args = parser.parse_args()
+    results = process_directory(args.directory, args.threshold)
+    for image_path, tags in results.items():
+        print(f"{image_path}: {tags}")

jtp2_overwrite ADDED Viewed

	@@ -0,0 +1,316 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+JTP2 (Joint Tagger Project 2) Image Classification Script
+This script implements a multi-label classifier for furry images using the
+PILOT2 model. It processes images, generates tags, and saves the results. The
+model is based on a Vision Transformer architecture and uses a custom GatedHead
+for classification.
+Key features:
+- Image preprocessing and transformation
+- Model inference using PILOT2
+- Tag generation with customizable threshold
+- Batch processing of image directories
+- Saving results as text files alongside images
+Usage:
+python jtp2.py <directory> [--threshold <float>]
+"""
+import os
+import json
+import argparse
+from PIL import Image
+import safetensors.torch
+import timm
+from timm.models import VisionTransformer
+import torch
+from torchvision.transforms import transforms
+from torchvision.transforms import InterpolationMode
+import torchvision.transforms.functional as TF
+import pillow_jxl
+class Fit(torch.nn.Module):
+    """
+    A custom transform module for resizing and padding images.
+    Args:
+        bounds (tuple[int, int] | int): The target dimensions for the image.
+        interpolation (InterpolationMode): The interpolation method for resizing.
+        grow (bool): Whether to allow upscaling of images.
+        pad (float | None): The padding value to use if padding is applied.
+    """
+    def __init__(
+        self,
+        bounds: tuple[int, int] | int,
+        interpolation=InterpolationMode.LANCZOS,
+        grow: bool = True,
+        pad: float | None = None
+    ):
+        super().__init__()
+        self.bounds = (bounds, bounds) if isinstance(bounds, int) else bounds
+        self.interpolation = interpolation
+        self.grow = grow
+        self.pad = pad
+    def forward(self, img: Image) -> Image:
+        """
+        Applies the Fit transform to the input image.
+        Args:
+            img (Image): The input PIL Image.
+        Returns:
+            Image: The transformed PIL Image.
+        """
+        wimg, himg = img.size
+        hbound, wbound = self.bounds
+        hscale = hbound / himg
+        wscale = wbound / wimg
+        if not self.grow:
+            hscale = min(hscale, 1.0)
+            wscale = min(wscale, 1.0)
+        scale = min(hscale, wscale)
+        if scale == 1.0:
+            return img
+        hnew = min(round(himg * scale), hbound)
+        wnew = min(round(wimg * scale), wbound)
+        img = TF.resize(img, (hnew, wnew), self.interpolation)
+        if self.pad is None:
+            return img
+        hpad = hbound - hnew
+        wpad = wbound - wnew
+        tpad = hpad // 2
+        bpad = hpad - tpad
+        lpad = wpad // 2
+        rpad = wpad - lpad
+        return TF.pad(img, (lpad, tpad, rpad, bpad), self.pad)
+    def __repr__(self) -> str:
+        """
+        Returns a string representation of the Fit module.
+        Returns:
+            str: A string describing the module's parameters.
+        """
+        return (
+            f"{self.__class__.__name__}(bounds={self.bounds}, "
+            f"interpolation={self.interpolation.value}, grow={self.grow}, "
+            f"pad={self.pad})"
+        )
+class CompositeAlpha(torch.nn.Module):
+    """
+    A module for compositing images with alpha channels over a background color.
+    Args:
+        background (tuple[float, float, float] | float): The background color to
+        use for compositing.
+    """
+    def __init__(self, background: tuple[float, float, float] | float):
+        super().__init__()
+        self.background = (
+            (background, background, background)
+            if isinstance(background, float)
+            else background
+        )
+        self.background = torch.tensor(self.background).unsqueeze(1).unsqueeze(2)
+    def forward(self, img: torch.Tensor) -> torch.Tensor:
+        """
+        Applies alpha compositing to the input image tensor.
+        Args:
+            img (torch.Tensor): The input image tensor.
+        Returns:
+            torch.Tensor: The composited image tensor.
+        """
+        if img.shape[-3] == 3:
+            return img
+        alpha = img[..., 3, None, :, :]
+        img[..., :3, :, :] *= alpha
+        background = self.background.expand(-1, img.shape[-2], img.shape[-1])
+        if background.ndim == 1:
+            background = background[:, None, None]
+        elif background.ndim == 2:
+            background = background[None, :, :]
+        img[..., :3, :, :] += (1.0 - alpha) * background
+        return img[..., :3, :, :]
+    def __repr__(self) -> str:
+        """
+        Returns a string representation of the CompositeAlpha module.
+        Returns:
+            str: A string describing the module's parameters.
+        """
+        return f"{self.__class__.__name__}(background={self.background})"
+transform = transforms.Compose([
+    Fit((384, 384)),
+    transforms.ToTensor(),
+    CompositeAlpha(0.5),
+    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
+    transforms.CenterCrop((384, 384)),
+])
+model = timm.create_model(
+    "vit_so400m_patch14_siglip_384.webli",
+    pretrained=False,
+    num_classes=9083
+)  # type: VisionTransformer
+class GatedHead(torch.nn.Module):
+    """
+    A custom head module with gating mechanism for the classifier.
+    Args:
+        num_features (int): The number of input features.
+        num_classes (int): The number of output classes.
+    """
+    def __init__(self, num_features: int, num_classes: int):
+        super().__init__()
+        self.num_classes = num_classes
+        self.linear = torch.nn.Linear(num_features, num_classes * 2)
+        self.act = torch.nn.Sigmoid()
+        self.gate = torch.nn.Sigmoid()
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Applies the gated head to the input tensor.
+        Args:
+            x (torch.Tensor): The input tensor.
+        Returns:
+            torch.Tensor: The output tensor after applying the gated head.
+        """
+        x = self.linear(x)
+        x = self.act(x[:, :self.num_classes]) * self.gate(x[:, self.num_classes:])
+        return x
+model.head = GatedHead(min(model.head.weight.shape), 9083)
+safetensors.torch.load_model(
+    model, "/home/kade/source/repos/JTP2/JTP_PILOT2-e3-vit_so400m_patch14_siglip_384.safetensors"
+)
+if torch.cuda.is_available():
+    model.cuda()
+    if torch.cuda.get_device_capability()[0] >= 7:  # tensor cores
+        model.to(dtype=torch.float16, memory_format=torch.channels_last)
+model.eval()
+with open("/home/kade/source/repos/JTP2/tags.json", "r", encoding="utf-8") as file:
+    tags = json.load(file)  # type: dict
+allowed_tags = list(tags.keys())
+for idx, tag in enumerate(allowed_tags):
+    allowed_tags[idx] = tag.replace("_", " ")
+sorted_tag_score = {}
+def run_classifier(image, threshold):
+    """
+    Runs the classifier on a single image and returns tags based on the threshold.
+    Args:
+        image (PIL.Image): The input image.
+        threshold (float): The probability threshold for including tags.
+    Returns:
+        tuple: A tuple containing the comma-separated tags and a dictionary of
+        tag probabilities.
+    """
+    global sorted_tag_score
+    img = image.convert('RGBA')
+    tensor = transform(img).unsqueeze(0)
+    if torch.cuda.is_available():
+        tensor = tensor.cuda()
+        if torch.cuda.get_device_capability()[0] >= 7:  # tensor cores
+            tensor = tensor.to(dtype=torch.float16, memory_format=torch.channels_last)
+    with torch.no_grad():
+        probits = model(tensor)[0].cpu()
+        values, indices = probits.topk(250)
+    tag_score = dict()
+    for i in range(indices.size(0)):
+        tag_score[allowed_tags[indices[i]]] = values[i].item()
+    sorted_tag_score = dict(
+        sorted(tag_score.items(), key=lambda item: item[1], reverse=True)
+    )
+    return create_tags(threshold)
+def create_tags(threshold):
+    """
+    Creates a list of tags based on the current sorted_tag_score and the given
+    threshold.
+    Args:
+        threshold (float): The probability threshold for including tags.
+    Returns:
+        tuple: A tuple containing the comma-separated tags and a dictionary of
+        filtered tag probabilities.
+    """
+    global sorted_tag_score
+    filtered_tag_score = {
+        key: value for key, value in sorted_tag_score.items() if value > threshold
+    }
+    text_no_impl = ", ".join(filtered_tag_score.keys())
+    return text_no_impl, filtered_tag_score
+def process_directory(directory, threshold):
+    """
+    Processes all images in a directory and its subdirectories, generating tags
+    for each image.
+    Args:
+        directory (str): The path to the directory containing images.
+        threshold (float): The probability threshold for including tags.
+    Returns:
+        dict: A dictionary mapping image paths to their generated tags.
+    """
+    results = {}
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.lower().endswith(('.jpg', '.jpeg', '.png', '.jxl')):
+                image_path = os.path.join(root, file)
+                image = Image.open(image_path)
+                tags, _ = run_classifier(image, threshold)
+                results[image_path] = tags
+                # Save tags to a text file with the same name as the image
+                text_file_path = os.path.splitext(image_path)[0] + ".txt"
+                with open(text_file_path, "w", encoding="utf-8") as text_file:
+                    text_file.write(tags)
+    return results
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Run inference on a directory of images."
+    )
+    parser.add_argument("directory", type=str, help="Target directory containing images.")
+    parser.add_argument(
+        "--threshold", type=float, default=0.2, help="Threshold for tag filtering."
+    )
+    args = parser.parse_args()
+    results = process_directory(args.directory, args.threshold)
+    for image_path, tags in results.items():
+        print(f"{image_path}: {tags}")

paper-qa.code-workspace ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+	"folders": [
+		{
+			"path": "."
+		},
+		{
+			"path": "../miniconda3/lib/python3.12/site-packages/paperqa"
+		}
+	],
+	"settings": {}
+}

papers_please ADDED Viewed

	@@ -0,0 +1,151 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import subprocess
+import sys
+from pathlib import Path
+import pickle
+from paperqa import Settings, Docs
+local_llm_config = {
+    "model_list": [
+        {
+            "model_name": "ollama/llama3.1",
+            "litellm_params": {
+                "model": "ollama/llama3.1",
+            },
+        },
+    ]
+}
+local_emb_config = {
+    "model_list": [
+        {
+            "model_name": "ollama/mxbai-embed-large",
+            "litellm_params": {
+                "model": "ollama/mxbai-embed-large",
+            },
+        }
+    ]
+}
+settings = Settings(
+    llm="ollama/llama3.1",
+    llm_config=local_llm_config,
+    summary_llm="ollama/llama3.1",
+    summary_llm_config=local_llm_config,
+    embedding="ollama/mxbai-embed-large",
+    embedding_config=local_emb_config,
+)
+def find_main_tex_file(folder_path: Path):
+    """
+    Find the main LaTeX file in the given folder.
+    This function searches for a .tex file that is likely to be the main file
+    of a LaTeX project. It first checks for common names like 'main.tex',
+    then looks for files containing '\\documentclass', and finally returns
+    the first .tex file if no other criteria are met.
+    Args:
+        folder_path (Path): The path to the folder to search in.
+    Returns:
+        Path: The path to the main .tex file, or None if no .tex files are found.
+    """
+    tex_files = list(folder_path.glob('**/*.tex'))
+    if not tex_files:
+        return None
+    # Check for common main file names
+    common_names = ['main.tex', 'paper.tex', 'article.tex']
+    for name in common_names:
+        if name in tex_files:
+            return name
+    # If no common name found, look for \documentclass
+    for file in tex_files:
+        with open(file, 'r', encoding='utf-8') as f:
+            content = f.read()
+            if '\\documentclass' in content:
+                return file
+    # If still not found, return the first .tex file
+    return tex_files[0]
+def run_latexpand(input_file, output_file):
+    """
+    Run the latexpand command on the input file and write the result to the output file.
+    This function uses the latexpand tool to expand a LaTeX file, including all its
+    inputs and packages, into a single file. The expanded content is then written
+    to the specified output file.
+    Args:
+        input_file (str or Path): The path to the input LaTeX file.
+        output_file (str or Path): The path where the expanded LaTeX content will be written.
+    Raises:
+        subprocess.CalledProcessError: If latexpand encounters an error during execution.
+        FileNotFoundError: If the latexpand command is not found in the system PATH.
+    """
+    try:
+        result = subprocess.run(['latexpand', input_file],
+                                capture_output=True, text=True, check=True)
+        with open(output_file, 'w', encoding='utf-8') as output_file_handle:
+            output_file_handle.write(result.stdout)
+        print(f"Expanded LaTeX written to {output_file}")
+    except subprocess.CalledProcessError as e:
+        print(f"Error running latexpand: {e}")
+    except FileNotFoundError:
+        print("latexpand not found. Please make sure it's installed and in your PATH.")
+cache_path = Path("pqa_index.pkl")
+if cache_path.exists():
+    with open(cache_path, "rb") as f:
+        docs = pickle.load(f)
+else:
+    docs = Docs()
+    for root, dirs, files in Path(".").walk():
+        for dir_name in dirs:
+            if dir_name.startswith("arXiv-"):
+                dir_path = root / dir_name
+                concat_main = dir_path / ".main.tex"
+                try:
+                    # Step 1: Find the main entry TeX file
+                    main_file = find_main_tex_file(dir_path)
+                    if not main_file:
+                        raise ValueError("No main TeX file found.")
+                    # Step 2 & 3: Run latexpand and write output
+                    run_latexpand(main_file, dir_path / ".main.tex")
+                except (ValueError, subprocess.CalledProcessError,
+                        FileNotFoundError) as preprocess_error:
+                    print(f"Failed to pre-process {dir_name}: {preprocess_error}")
+                    continue
+                print(f"adding {dir_path} (latex source)")
+                try:
+                    docs.add(concat_main, settings=settings, disable_check=True)
+                except (IOError, OSError, ValueError) as add_error:
+                    print(f"Failed to add {dir_path}: {add_error}")
+                    continue
+                dirs.remove(dir_name)
+                break
+        else:
+            for file_name in files:
+                if file_name.lower().endswith((".pdf", ".txt", ".md", ".tex")):
+                    file_path = root / file_name
+                    print(f"adding {file_path}")
+                    docs.add(file_path, settings=settings, disable_check=True)
+    with open(cache_path, "wb") as f:
+        pickle.dump(docs, f)
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        QUERY = " ".join(sys.argv[1:])
+        answer = docs.query(QUERY, settings=settings)
+        print(answer)
+    else:
+        print("Please provide a query as a command-line argument.")
+        print("Usage: python script_name.py 'Your query here'")

password ADDED Viewed

	@@ -0,0 +1,14 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import random
+import string
+def generate_password(length=16):
+    characters = string.ascii_letters + string.digits + string.punctuation
+    password = ''.join(random.choice(characters) for _ in range(length))
+    return password
+# Generate a strong 16-character long password
+strong_password = generate_password()
+print(strong_password)