toolkit / .zshrc
k4d3's picture
concat_captions and zsh fuckery
3e02680
raw
history blame
43.9 kB
# Configuration for Oh My Zsh and custom settings
# ----------------------------------------------
# 1. Source the custom git wrapper script
# 2. Set the path for Oh My Zsh installation
# 3. Define plugins for extended functionality:
# - git: Provides aliases and functions for Git
# - autojump: Enables quick navigation to frequently visited directories
# - conda-env: Adds support for Conda environment management
# 4. Set the custom theme for the shell prompt
# The `export QT_QPA_PLATFORM=offscreen` command is used to set the `QT_QPA_PLATFORM`
# environment variable to `offscreen`. This is particularly useful when running Qt
# applications in a headless environment, such as a server or a CI/CD pipeline,
# where there is no display server available. By setting this variable, Qt
# applications can render their graphical output offscreen, allowing them to
# run without requiring a graphical user interface (GUI). This is commonly used for
# automated testing, rendering, or other tasks that do not require user interaction.
export QT_QPA_PLATFORM=offscreen
# Enable the experimental Just-In-Time (JIT) compiler for Python 3.13.
# This can improve performance by compiling Python code to machine code at runtime.
# Note: The JIT is only available for x86_64 builds of Python in conda[^1^][1].
export PYTHON_JIT=1
# Load the custom git wrapper script
source $HOME/toolkit/git-wrapper.zsh
# Set the path to the Oh My Zsh installation directory
export ZSH="$HOME/.oh-my-zsh"
# Enable Oh My Zsh plugins for additional features
plugins=(git autojump conda-env)
# Set the custom theme for the shell prompt
ZSH_THEME="kade"
# CASE_SENSITIVE="true"
# HYPHEN_INSENSITIVE="true"
# DISABLE_MAGIC_FUNCTIONS="true"
# DISABLE_LS_COLORS="true"
# DISABLE_AUTO_TITLE="true"
# ENABLE_CORRECTION="true"
# COMPLETION_WAITING_DOTS="true"
# DISABLE_UNTRACKED_FILES_DIRTY="true"
# Set the system language and locale to Japanese UTF-8
export LANG=ja_JP.UTF-8
export LC_ALL=ja_JP.UTF-8
# Set the maximum number of commands to store in the shell history
export HISTSIZE=500000
# Set the path to the ComfyUI installation
export COMFYUI_PATH="$HOME/ComfyUI"
# Set the CUDA version for bitsandbytes library
export BNB_CUDA_VERSION=126
# Enable full backtrace for Rust programs
export RUST_BACKTRACE=1
# Opt out of .NET CLI telemetry data collection
export DOTNET_CLI_TELEMETRY_OPTOUT=1
# Enable color output in the terminal (value might need adjustment)
export CLICOLOR=126
# ⚠️ TODO: This needs to be benched but I'm too bad at this!
# Set the maximum number of threads for NumExpr library
# NumExpr is used for fast numerical array operations
# This setting can improve performance for multi-threaded NumPy operations
export NUMEXPR_MAX_THREADS=24
# Set the maximum number of threads for Apple's Accelerate framework (VecLib)
# This affects performance of vector and matrix operations on macOS
# Note: This setting may not have an effect on non-macOS systems
export VECLIB_MAXIMUM_THREADS=24
# Set the number of threads for Intel Math Kernel Library (MKL)
# MKL is used for optimized mathematical operations, especially in NumPy
# This can significantly impact performance of linear algebra operations
export MKL_NUM_THREADS=24
# Set the number of threads for OpenMP
# OpenMP is used for parallel programming in C, C++, and Fortran
# This affects the performance of libraries and applications using OpenMP
export OMP_NUM_THREADS=24
# Disable parallelism for the Hugging Face Tokenizers library
# This can help prevent potential deadlocks or race conditions in multi-threaded environments
# It's particularly useful when using tokenizers in conjunction with DataLoader in PyTorch
# Setting this to false ensures more predictable behavior, especially in production environments
# However, it may slightly reduce performance in some scenarios where parallel tokenization is beneficial
export TOKENIZERS_PARALLELISM=false
# Source the broot launcher script for enhanced file navigation
source /home/kade/.config/broot/launcher/bash/br
# Source the fzf (Fuzzy Finder) configuration for zsh if it exists
# This enables fzf functionality in the shell, including keybindings and auto-completion
[ -f ~/.fzf.zsh ] && source ~/.fzf.zsh
# >>> conda initialize >>>
# !! Contents within this block are managed by 'conda init' !!
__conda_setup="$('/home/kade/miniconda3/bin/conda' 'shell.zsh' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/home/kade/miniconda3/etc/profile.d/conda.sh" ]; then
. "/home/kade/miniconda3/etc/profile.d/conda.sh"
else
export PATH="/home/kade/miniconda3/bin:$PATH"
fi
fi
unset __conda_setup
# <<< conda initialize <<<
unset CONDA_CHANGEPS1
# Source the Oh My Zsh script
# This line loads Oh My Zsh, a popular framework for managing Zsh configuration
# It sets up various features like themes, plugins, and custom functions
# The $ZSH variable should be set to the installation directory of Oh My Zsh
# This is typically done earlier in the .zshrc file, often as: export ZSH="$HOME/.oh-my-zsh"
# After sourcing, all Oh My Zsh functionality becomes available in your shell session
source $ZSH/oh-my-zsh.sh
# Extend the system PATH to include various directories:
# - Custom dataset tools in the user's repository
# - Rust's Cargo binary directory
# - Miniconda3 binary directory
# - User's toolkit directory
# - Redis and PostgreSQL binary directories
# - User's local bin directory
# - CUDA binary directory
export PATH=$PATH:$HOME/source/repos/dataset-tools/target/x86_64-unknown-linux-gnu/release:$HOME/.cargo/bin:$HOME/miniconda3/bin:$HOME/toolkit:$HOME/db/redis-stable/src:$HOME/db/postgresql/bin:$HOME/.local/bin:/opt/cuda/bin
# Function to remove $HOME/miniconda3/bin, $HOME/miniconda3/condabin from PATH and $HOME/miniconda3/lib from LD_LIBRARY_PATH
rconda() {
export PATH=$(echo $PATH | tr ':' '\n' | grep -v "$HOME/miniconda3/bin" | grep -v "$HOME/miniconda3/condabin" | tr '\n' ':' | sed 's/:$//')
export LD_LIBRARY_PATH=$(echo $LD_LIBRARY_PATH | tr ':' '\n' | grep -v "$HOME/miniconda3/lib" | tr '\n' ':' | sed 's/:$//')
}
# Extend the LD_LIBRARY_PATH to include:
# - Conda environment's library directory
# - CUDA library directory for x86_64 Linux
# This ensures that dynamically linked libraries in these locations can be found at runtime
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib:/opt/cuda/targets/x86_64-linux/lib
# Set the path for ComfyUI models
# This environment variable likely tells ComfyUI where to look for AI models
export COMFYUI_MODEL_PATH=/home/kade/ComfyUI/models
# Use the UPX executable compression tool from the local bin directory
alias upx='/home/kade/.local/bin/upx'
# Always display colorized output for the 'ls' command
alias ls='ls --color=always'
# List all files in long format, including hidden files, with human-readable sizes and colorized output
alias ll="ls -lah --color=always"
# Use the 'reflink' option for 'cp' to enable copy-on-write when possible, improving efficiency
alias cp='cp --reflink=auto'
# Launch TensorBoard with the log directory set to the user's output_dir/logs
alias t="tensorboard --logdir=$HOME/output_dir/logs"
# Edit tmux configuration, display a message, and reload the tmux configuration
alias rt="vim ~/.tmux.conf && echo \"Reloading tmux config\" && tmux source ~/.tmux.conf"
# Edit zsh configuration, display a message, and reload the zsh configuration
alias zr="vim ~/.zshrc && echo \"Reloading zsh config\" && source ~/.zshrc"
# The kanji 接 (せつ) [setsu] means "touch," "contact," "adjoin," or "piece together."
# It is used here to represent the action of "attaching" to an existing tmux session,
# as it conveys the idea of connecting or joining the session.
# To type the kanji 接 on a Japanese keyboard:
# 1. Switch your keyboard to Japanese input mode.
# 2. Type "setsu" (せつ) in hiragana.
# 3. Press the spacebar to convert it to the kanji 接.
alias 接="tmux attach"
# Alias for attaching to an existing tmux session
# 'ta' is a shorthand for 'tmux attach'
alias ta="tmux attach"
# Alias for adding all changes, committing with a signed verbose message, and pushing to remote
alias ga="git add . && git commit -avs && git push"
# Alias for checking the current status of the git repository
alias gs="git status"
# Alias for displaying word-level differences in git, using a custom regex and the patience algorithm
alias wd="git diff --word-diff-regex='[^,]+' --patience"
# Alias for using Neovim instead of Vim
alias vim="nvim"
# Another alias for using Neovim instead of Vim
alias vi="nvim"
# Short alias for quickly opening Neovim
alias v="nvim"
# Alias for resetting the git repository to the last commit, discarding all changes
alias grh='git reset --hard'
# Alias for cloning a git repository including all its submodules
alias gcs='git clone --recurse-submodules'
# Alias for running the Grabber-cli command
alias grabber="Grabber-cli"
# 'pie' is a shortcut for installing a Python package in editable mode
# using the pip command with the --use-pep517 option.
alias pie='pip install -e . --use-pep517'
# Alias for creating a signed, verbose git commit
alias gc="git commit -avs --verbose"
# Alias for displaying directory contents with colorized output
alias dir="dir --color=always"
# Alias for quickly reloading the zsh configuration file
alias rl="source ~/.zshrc"
# Alias for quickly editing and reloading the zsh configuration file
alias ezc="nvim ~/.zshrc && source ~/.zshrc"
display_git_help() {
echo "Git"
echo "---"
echo "ga: \`git add . && git commit -avs --verbose && git push\`"
echo "gc: \`git commit -avs --verbose\`"
echo "gcs: \`git clone --recurse-submodules\`"
echo "grh: \`git reset --hard\`"
echo "wd: \`git diff --word-diff-regex='[^,]+' --patience\`"
echo "gs: \`git status\`"
echo "gcx: \`git clean -fxd\`"
}
display_git_help
# This function copies the sample prompts file to each dataset directory.
# It iterates through all directories in ~/datasets that start with "by_"
# and copies the kade-sample-prompts.txt file from the toolkit directory
# to a new file named sample-prompts.txt in each dataset directory.
function copy_sample_prompts() {
for dir in ~/datasets/by_*; do
if [[ -d "$dir" ]]; then
cp ~/toolkit/kade-sample-prompts.txt "$dir/sample-prompts.txt"
fi
done
}
# Function: re (Resize SDXL LoRA)
# Description:
# This function resizes an SDXL LoRA (Low-Rank Adaptation) model using the resize_lora.py script.
# It applies a specific resizing recipe to reduce the size of the LoRA while maintaining its effectiveness.
#
# Usage:
# re <target_file>
#
# Parameters:
# $1 (target_file): Path to the input LoRA safetensors file to be resized.
#
# Actions:
# 1. Calls the resize_lora.py script with the following arguments:
# - Verbose output (-vv)
# - Custom resizing recipe (-r fro_ckpt=1,thr=-3.55)
# - Path to the SDXL checkpoint file (ponyDiffusionV6XL_v6StartWithThisOne.safetensors)
# - Path to the input LoRA file
#
# Recipe Explanation:
# - fro_ckpt=1: Uses the Frobenius norm of the checkpoint layer as the score metric
# - thr=-3.55: Sets a threshold for singular values at 10^-3.55 ≈ 0.000282 times the reference
#
# Notes:
# - This function assumes the resize_lora.py script is located at ~/source/repos/resize_lora/
# - The SDXL checkpoint file is expected to be in ~/ComfyUI/models/checkpoints/
# - Output will be verbose (-vv) for detailed information during the resizing process
# - The resized LoRA will be saved in the same directory as the script by default
function re() {
target_file="$1"
python ~/source/repos/resize_lora/resize_lora.py -vv -r fro_ckpt=1,thr=-3.55 ~/ComfyUI/models/checkpoints/ponyDiffusionV6XL_v6StartWithThisOne.safetensors "$target_file"
}
# This function takes two arguments: a tag and a directory.
# It searches for all .txt files in the specified directory and its subdirectories.
# If a file contains the specified tag, the function removes the tag from its original position
# and prepends it to the beginning of the file.
# Usage: rejiggle_tags <tag> <directory>
rejiggle_tags() {
local tag="$1"
local dir="$2"
if [[ -z "$tag" || -z "$dir" ]]; then
echo "Usage: rejiggle_tags <tag> <directory>"
return 1
fi
find "$dir" -type f -name "*.tags" | while read -r file; do
if grep -q "$tag" "$file"; then
sed -i "s/$tag//g" "$file"
sed -i "1s/^/$tag, /" "$file"
fi
done
}
# This function, `update_conda`, automates the process of upgrading all packages in every conda environment.
# It performs the following steps:
# 1. Retrieves the list of all conda environments using `conda env list` and extracts their names.
# 2. Iterates through each environment name.
# 3. Activates each environment using `conda activate`.
# 4. Upgrades all packages in the activated environment using `conda upgrade --all -y`.
# 5. Deactivates the environment using `conda deactivate`.
# 6. Prints a message indicating that all environments have been upgraded.
#
# Note: This script assumes that the user has the necessary permissions to activate and deactivate conda environments.
# It also assumes that `conda` is installed and properly configured in the user's PATH.
#
# Usage:
# Simply call the `update_conda` function in your shell to upgrade all packages in all conda environments.
update_conda() {
# Get the list of all conda environments
envs=$(conda env list | awk '{print $1}' | tail -n +4)
# Loop through each environment and run conda upgrade --all
for env in $envs; do
echo "Activating environment: $env"
source activate $env
echo "Upgrading all packages in environment: $env"
conda upgrade --all -y
conda deactivate
done
echo "All environments have been upgraded."
}
# Function: list_word_freqs
# Description:
# This function analyzes text files in a specified directory and lists the most frequent words (tags).
#
# Usage:
# list_word_freqs <target_directory>
#
# Parameters:
# - target_directory: The directory containing the text files to analyze.
#
# Functionality:
# 1. Combines all .txt files in the target directory into a single temporary file.
# 2. Uses awk to process the combined file:
# - Ignores common words like "a", "the", "and", etc.
# - Converts all words to lowercase and removes non-alphabetic characters.
# - Counts the frequency of each word.
# 3. Sorts the words by frequency in descending order.
# 4. Displays the top 40 most frequent words along with their occurrence count.
#
# Output:
# Prints a list of the 40 most frequent words in the format: <frequency> <word>
#
# Note:
# - This function is useful for analyzing tag frequencies in image caption files or similar text-based datasets.
# - The list of ignored words can be modified to suit specific needs.
# - The function creates a temporary file which is automatically removed after processing.
list_word_freqs() {
local target_dir=$1
if [[ -z "$target_dir" ]]; then
echo "Usage: list_word_freqs <target_directory>"
return 1
fi
# Combine all text files into one
local combined_file=$(mktemp)
cat "$target_dir"/*.txt > "$combined_file"
# List the most frequent words, ignoring specific words
awk '
BEGIN {
ignore["a"]
ignore["the"]
ignore["and"]
ignore["is"]
ignore["with"]
ignore["of"]
ignore["in"]
ignore["or"]
ignore["on"]
ignore["to"]
ignore["has"]
ignore["he"]
ignore["from"]
}
{
for (i = 1; i <= NF; i++) {
word = tolower($i)
gsub(/[^a-z]/, "", word)
if (length(word) > 0 && !(word in ignore)) {
freq[word]++
}
}
}
END {
for (word in freq) {
print freq[word], word
}
}
' "$combined_file" | sort -nr | head -n 40
# Clean up
rm "$combined_file"
}
# Function: sample_prompts
# Description:
# This function takes a sample of the tag in a target training directory.
# It reads and displays the contents of all .txt files in the specified directory,
# providing a quick overview of the tags used for training.
#
# Usage: sample_prompts <target_directory>
#
# Parameters:
# - target_directory: The directory containing the .txt files with tags.
#
# Output:
# Prints the contents of each .txt file in the target directory, separated by newlines.
sample_prompts() {
local target_directory="$1"
for file in "$target_directory"/*.txt; do
cat "$file"
echo -e "\n"
done
}
# replace_comma_with_keep_tags
# Description: This function replaces the specified occurrence of a comma with " |||" in all *.tags files
# in all subdirectories of a target directory or the current directory when no path is passed.
# Usage: replace_comma_with_keep_tags <occurrence_number> [target_directory]
# Parameters:
# - occurrence_number: The occurrence number of the comma to be replaced (e.g., 1 for the first occurrence).
# - target_directory (optional): The target directory to search for *.tags files. If not provided, the current directory is used.
# Example:
# replace_comma_with_keep_tags 2 /path/to/directory
# replace_comma_with_keep_tags 1
replace_comma_with_keep_tags() {
local occurrence_number=$1
local target_directory=${2:-.}
if [[ -z "$occurrence_number" ]]; then
echo "Error: occurrence_number is required."
return 1
fi
find "$target_directory" -type f -name "*.tags" | while read -r file; do
awk -v occurrence="$occurrence_number" '{
count = 0
for (i = 1; i <= NF; i++) {
if ($i ~ /,/) {
count++
if (count == occurrence) {
gsub(/,/, " |||", $i)
}
}
}
print
}' "$file" > tmpfile && mv tmpfile "$file"
done
}
# Function: display_custom_help
# Description:
# This function displays a custom help menu with various commands, environment information,
# and useful instructions for different tools and services. It provides a quick reference
# for commonly used commands, LLM setups, taggers, captioners, and database configurations.
display_custom_help() {
echo "----------------------------------------------------------------------------------------------------------------------"
printf "%s\n" "$(conda env list)"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "LLMs"
echo "---"
echo "conda activate openwebui && PORT=6969 $HOME/source/repos/open-webui/backend/start.sh"
#echo "conda activate openwebui && open-webui serve --port 6969"
echo "ollama serve"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "Taggers + Captioners"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "gallery-dl --cookies-from-browser firefox https://x.com/whatever"
echo "JTP2"
echo "---"
echo "~/toolkit/jtp2 <dir>"
echo "Joy Captioner"
echo "---"
echo "~/source/repos/joy/joy <dir> --custom_prompt \"<prompt>\" --caption_type custom"
echo "Waifu Diffusion Tagger:"
echo "---"
echo "python ~/source/repos/wdv3-timm/wdv3_timm.py <dir> --model eva02"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "Database Stuff"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "Redis"
echo "---"
echo "~/db/redis-stable/src/redis-server : Start server."
echo "PostgreSQL"
echo "---"
echo "psql -d postgres -h /tmp : Connect using socket directory."
echo "Start server:"
echo "pg_ctl -D \$HOME/db/postgresql/data -l \$HOME/db/pgsql.log start"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "Commands, Aliases, and Custom Functions:"
echo "----------------------------------------------------------------------------------------------------------------------"
echo "pie : \`pip install -e . --use-pep517\`"
echo "gcs : \`git clone --recurse-submodules\`"
echo "dust : A more intuitive version of du."
echo "ranger : A vim inspired file manager."
echo "htop : Interactive process viewer."
echo "nvtop : Interactive GPU process viewer."
echo "nvitop : An even more interactive GPU process viewer."
echo "nvim : Alias for vim."
echo "rt : Edit tmux config and reload it."
echo "zr : Edit zsh config and reload it."
echo "ta : Attach to tmux session."
echo "ga : Git add, commit, and push."
echo "gs : Git status."
echo "wd : Word diff in git."
echo "grabber : Alias for Grabber-cli."
echo "ls : Alias for 'ls --color=always'."
echo "ll : List all files in long format, including hidden files, with human-readable sizes and colorized output."
echo "cp : Use the 'reflink' option for 'cp' to enable copy-on-write when possible, improving efficiency."
echo "t : Launch TensorBoard with the log directory set to the user's output_dir/logs."
echo "接 : Attach to an existing tmux session."
echo "gc : Create a signed, verbose git commit."
echo "dir : Display directory contents with colorized output."
echo "ezc : Quickly edit and reload the zsh configuration file."
echo "nv : Returns the CUDA version number."
echo "remove_repetition : Removes repetition in txt files in a target directory."
echo "copy_sample_prompts : Copies ./sample-prompt.txt file from the current dir to datasets/furry."
echo "remove_number_prefix : Removes all numbers prefixed by a _ from the end of every file."
echo "count_captions : Counts *.caption and *.txt files in each subdirectory."
echo "count_captions_per_folder : Counts *.caption and *.txt files in each subdirectory individually."
echo "copy_matching_caption_files : Copies matching .caption files for <dir> to the current directory."
echo "c : Change to ComfyUI directory and start the server."
echo "t : Start TensorBoard with logs directory."
echo "png2mp4 : Convert PNG sequence to MP4 video."
echo "seed <file> : Display the seed from a safetensors file."
echo "swch <branch> : Clean repo and switch to specified git branch."
echo "convert_to_jxl <directory> : Convert JPG, JPEG, and PNG files to JXL in the specified directory."
echo "convert_pxl_to_png <directory> : Convert PXL files to PNG in the specified directory."
echo "replace_text_in_files [dir] <src> <replace> : Perform text replacement on *.txt files in a target directory."
echo "update_dir [directory] : Update git repositories in subdirectories."
echo "inject_to_captions [dir] \"txt\" : Add prefix to the beginning of each text file in a directory."
echo "chop_lora <input_file> : Generate multiple versions of a Lora file with different presets."
echo "----------------------------------------------------------------------------------------------------------------------"
}
# This function `nv` retrieves the version of the NVIDIA CUDA Compiler (nvcc) installed on the system.
# It extracts the version number from the `nvcc --version` command output.
# The version number is then formatted by removing the dot (e.g., 12.6 becomes 126).
# Finally, the function returns the formatted version number.
nv() {
# Get the nvcc version output
local nvcc_output=$(nvcc --version)
# Extract the version number (12.6)
local version=$(echo "$nvcc_output" | grep -oP 'release \K[0-9]+\.[0-9]+')
# Remove the dot to get 126
local result=$(echo "$version" | tr -d '.')
# Print the result
echo $result
}
# Function to remove consecutive repeated words in text files within a directory
remove_repetition() {
local dir=$1 # The directory to search for text files
# Find all .txt files in the specified directory and process each file
find "$dir" -type f -name "*.txt" | while read -r file; do
# Use awk to process each line of the file
awk '
{
n = split($0, words, " ") # Split the line into words
for (i = n; i > 1; i--) { # Iterate from the last word to the second word
if (words[i] != words[i-1]) break # Stop if the current word is not equal to the previous word
}
for (j = 1; j <= i; j++) { # Print the words up to the point where repetition ends
printf "%s%s", words[j], (j == i ? ORS : OFS) # Print the word followed by a space or newline
}
}
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file"
done
}
# Organizes a sample prompt file from the current directory to datasets/furry.
# It moves the file named sample-prompts.txt to either
# ~/datasets/furry/sample_prompts/pony or ~/datasets/furry/sample_prompts/compass based on the content.
# If the file contains the regexp 'score_*', it is moved to ~/datasets/furry/sample_prompts/pony.
# Otherwise, it is moved to ~/datasets/furry/sample_prompts/compass.
# The -v option is used with cp to provide verbose output.
copy_sample_prompts() {
file="./sample-prompts.txt"
if grep -q 'score_*' "$file"; then
cp -v "$file" ~/datasets/furry/sample_prompts/pony/
else
cp -v "$file" ~/datasets/furry/sample_prompts/compass/
fi
echo "File has been organized."
}
# Removes all numbers prefixed by a _ from the end of every file before the file extension
remove_number_prefix() {
# Loop through all files in the current directory and its subdirectories
for file in **/*_[0-9]*.*; do
# Get the new file name by removing '_number' before the file extension
new_file="${file%_[0-9]*.*}.${file##*.}"
# Rename the file
mv "$file" "$new_file"
done
}
# Counts all *.caption and *.txt files in all subdirectories.
count_captions() {
caption_count=$(find . -type f -name "*.caption" | wc -l)
txt_count=$(find . -type f -name "*.txt" | wc -l)
echo "*.caption files: $caption_count"
echo "*.txt files: $txt_count"
}
# Counts *.caption and *.txt files in each subdirectory individually.
count_captions_per_folder() {
for dir in */; do
echo "Directory: $dir"
echo -n "*.caption files: "
find "$dir" -type f -name "*.caption" | wc -l
echo -n "*.txt files: "
find "$dir" -type f -name "*.txt" | wc -l
done
}
# open-webui
oui() {
conda activate openwebui
open-webui serve --port 6969
}
# Function to copy matching .caption files
copy_matching_caption_files() {
# Define the target directory
TARGET_DIR="$1"
# Loop through each image file in the current directory
for image_file in *.(jpg|jpeg|png|gif|bmp|tiff|webp|jxl); do
# Check if the file exists (to handle cases where no files match the pattern)
if [[ -f "$image_file" ]]; then
# Extract the base name (without extension)
base_name="${image_file%.*}"
# Define the corresponding .caption file in the target directory
caption_file="$TARGET_DIR/$base_name.caption"
# Check if the .caption file exists
if [[ -f "$caption_file" ]]; then
# Copy the .caption file to the current directory
cp "$caption_file" .
echo "Copied $caption_file to the current directory."
else
echo "No matching .caption file for $image_file."
fi
fi
done
}
# This script performs a text replacement operation in all .txt files within a specified directory.
# It takes three arguments:
# 1. target_dir: The directory containing the .txt files where the text replacement will occur.
# 2. search_text: The text string that needs to be replaced.
# 3. replace_text: The text string that will replace the search_text.
#
# The script uses a for loop to iterate through all .txt files in the target directory.
# It utilizes the 'sed' command to perform an in-place replacement of the search_text with the replace_text in each file.
# After processing all files, it prints a message indicating the completion of the text replacement operation.
# Example usage:
# replace_text_in_files "/path/to/directory" "squishy (artist)" "by squishy (artist)"
replace_text_in_files() {
local target_dir=$1
local search_text=$2
local replace_text=$3
# Loop through all .txt files in the target directory
for file in "$target_dir"/*.txt; do
# Use sed to replace the text
sed -i "s/$search_text/$replace_text/g" "$file"
done
echo "Text replacement complete in $target_dir!"
}
# This script adds a specified prefix to the beginning of each text file in a given directory.
# If the prefix already exists in the text file, it moves the prefix to the front of the text file without leaving extra commas or spaces.
# Usage: inject_to_tags <directory> <prefix>
# Arguments:
# <directory> - The directory containing the text files to be modified.
# <prefix> - The prefix to be added to the beginning of each text file.
# The script checks if the specified directory exists and iterates over each text file in the directory.
# For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
# If the directory does not exist, it prints an error message.
inject_to_tags() {
local dir="$1"
local prefix="$2"
if [[ -d "$dir" ]]; then
for file in "$dir"/*.tags; do
if [[ -f "$file" ]]; then
if grep -q "$prefix" "$file"; then
# Move the existing prefix to the front of the text file without leaving extra commas or spaces
local temp_file=$(mktemp)
sed "s/$prefix//" "$file" | sed "1s/^/${prefix}, /" | sed 's/^, //' | sed 's/,,/,/g' | sed 's/, ,/,/g' | sed 's/ ,/,/g' > "$temp_file"
mv "$temp_file" "$file"
echo "Moved '${prefix}' to the front of $file"
else
# Use a temporary file to store the modified content
local temp_file=$(mktemp)
echo "${prefix}, $(cat "$file")" | sed 's/,,/,/g' | sed 's/, ,/,/g' | sed 's/ ,/,/g' > "$temp_file"
mv "$temp_file" "$file"
echo "Added '${prefix}, ' to the front of $file"
fi
fi
done
else
echo "Directory $dir does not exist."
fi
}
# Function to update git repositories in subdirectories
update_dir() {
local target_dir="${1:-.}"
# Check if there are any subdirectories
if [[ -n "$(find "$target_dir" -mindepth 1 -maxdepth 1 -type d)" ]]; then
for dir in "$target_dir"/*/; do
if [[ -d "$dir" ]]; then
(
cd "$dir" || return
# If the directory is a git repository, pull the latest changes
if [[ -d ".git" ]]; then
echo "Updating $(pwd)"
git pull
fi
)
fi
done
fi
}
# Function: chop_lora
# Description:
# This function processes a LoRA (Low-Rank Adaptation) model file by selectively
# keeping or removing specific layers based on predefined presets. It uses the
# chop_blocks.py script to perform the actual layer manipulation.
#
# Usage:
# chop_lora <input_file>
#
# Parameters:
# $1 - The input LoRA model file (typically a .safetensors file)
#
# Presets:
# The function defines several presets, each represented by a 21-digit binary string:
# - ringdingding: This vector string was used for the Stoat LoRA.
# - squeaker: I really have no idea what this is.
# - heavylifter: Keeps only one specific layer that seems to learn the most.
# - style1 and style2: Different configurations for style transfer
# - beeg: A configuration that keeps only the largest layers.
# - all: Keeps all layers
# - allin: Keeps only the input layers
# - allmid: Keeps only the middle layers
# - allout: Keeps only the output layers
#
# Actions:
# 1. Extracts the base name of the input file (without extension)
# 2. Iterates through each preset
# 3. For each preset, generates an output filename and runs the chop_blocks.py script
# 4. The script creates a new LoRA file with only the specified layers retained
#
# Output:
# Creates multiple output files, one for each preset, named as:
# "<base_name>-<preset_name>.safetensors"
#
# Notes:
# - Requires the chop_blocks.py script to be located at ~/source/repos/resize_lora/chop_blocks.py
# - The binary strings represent which layers to keep (1) or remove (0)
# - This function allows for quick generation of multiple variants of a LoRA model,
# each emphasizing different aspects or effects
chop_lora() {
local input_file="$1"
local base_name="${input_file:r}" # Remove extension
# Define presets and their corresponding vector strings
declare -A presets=(
["ringdingding"] = "1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0"
["squeaker"] = "1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0"
["heavylifter"] = "1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0"
["style1"] = "1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
["style2"] = "1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
["beeg"] = "1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0"
["all"] = "1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"
["allin"] = "1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0"
["allmid"] = "1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0"
["allout"] = "1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1"
)
for preset in ${(k)presets}; do
local output_file="${base_name}-${preset}.safetensors"
local vector_string="${presets[$preset]}"
echo "Generating $output_file"
python ~/source/repos/resize_lora/chop_blocks.py "$input_file" "$vector_string" -o "$output_file"
done
}
# Function cs1
# This function chops blocks from an SDXL LoRA's safetensors file to preserve the style information only.
# It uses a specific block configuration and saves the output with a modified filename.
cs1() {
# Get the target safetensors file path from the first argument
local target_safetensors=$1
# Extract the base name of the target safetensors file (without the .safetensors extension)
local base_name=$(basename "$target_safetensors" .safetensors)
# Extract the version and step string from the base name (e.g., v1s400)
local version_step=$(echo "$base_name" | grep -o 'v[0-9]*s[0-9]*')
# Remove the version and step string from the base name to avoid duplication
local base_name_no_version=$(echo "$base_name" | sed "s/${version_step}//")
# Construct the output safetensors filename by appending c1 to the version and step string
local output_safetensors="${base_name_no_version}${version_step}c1.safetensors"
# Run the chop_blocks command with the specified block configuration and output filename
~/toolkit/chop_blocks "$target_safetensors" 1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0 -o "$output_safetensors"
}
# Function cs2
# This function chops blocks from an SDXL LoRA's safetensors file to preserve the style information only.
# It uses a different block configuration and saves the output with a modified filename.
cs2() {
# Get the target safetensors file path from the first argument
local target_safetensors=$1
# Extract the base name of the target safetensors file (without the .safetensors extension)
local base_name=$(basename "$target_safetensors" .safetensors)
# Extract the version and step string from the base name (e.g., v1s400)
local version_step=$(echo "$base_name" | grep -o 'v[0-9]*s[0-9]*')
# Remove the version and step string from the base name to avoid duplication
local base_name_no_version=$(echo "$base_name" | sed "s/${version_step}//")
# Construct the output safetensors filename by appending c2 to the version and step string
local output_safetensors="${base_name_no_version}${version_step}c2.safetensors"
# Run the chop_blocks command with the specified block configuration and output filename
~/toolkit/chop_blocks "$target_safetensors" 1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0 -o "$output_safetensors"
}
# Function: swch (Switch Git Branch)
# Description:
# This function facilitates switching between Git branches while ensuring a clean working directory.
#
# Usage:
# swch <branch_name>
#
# Parameters:
# $1 - The name of the branch to switch to.
#
# Actions:
# 1. Checks if a branch name is provided.
# 2. Cleans the working directory, removing untracked files and directories.
# 3. Pulls the latest changes from the remote repository.
# 4. Checks out the specified branch.
#
# Notes:
# - Use with caution as 'git clean -fxd' will remove all untracked files and directories.
# - Ensure all important changes are committed or stashed before using this function.
swch() {
if [ -z "$1" ]; then
echo "Please provide a branch name."
return 1
fi
branchname=$1
git clean -fxd && git pull && git checkout $branchname
}
# Function: extract_iframes
# Description:
# This function extracts I-frames from a video file using ffmpeg.
#
# Usage:
# extract_iframes <input_file> [<scene_change_fraction>]
#
# Parameters:
# $1 - The input video file (required)
# $2 - The scene change fraction threshold (optional, default: 0.1)
#
# Actions:
# 1. Assigns input arguments to variables
# 2. Extracts the base filename without extension
# 3. Runs ffmpeg to extract I-frames based on the scene change threshold
# 4. Saves extracted frames as PNG files with sequential numbering
#
# Notes:
# - Requires ffmpeg to be installed and accessible via /usr/bin/ffmpeg
# - Output files will be named as "<base_name>-XXXXXX.png" in the current directory
extract_iframes() {
# Assign input arguments
input_file="$1"
scene_change_fraction="${2:-0.1}"
# Get the base filename without extension
base_name=$(basename "$input_file" .webm)
# Run ffmpeg command
/usr/bin/ffmpeg -i "$input_file" -f image2 -vf "select=eq(pict_type\,PICT_TYPE_I)*gt(scene\,$scene_change_fraction),showinfo" -fps_mode vfr "${base_name}-%06d.png"
}
# Function: seed
# Description:
# This function extracts the seed value from a LoRA (Low-Rank Adaptation) model's metadata.
#
# Usage:
# seed <file_path>
#
# Parameters:
# $1 - The path to the LoRA model file (usually a .safetensors file)
#
# Actions:
# 1. Takes the file path as an argument
# 2. Uses Python to read the safetensors file
# 3. Extracts the metadata from the file
# 4. Attempts to retrieve the 'ss_seed' value from the metadata
# 5. Prints the seed value if found, or 'Not found' if not present
#
# Notes:
# - Requires Python 3 with the 'safetensors' module installed
# - The seed is typically used to reproduce the exact training conditions of the LoRA
# - If the seed is not found, it may indicate the LoRA was created without recording this information
seed() {
local filePath="$1"
python3 -c "
import safetensors, json
filePath = '$filePath'
print(json.loads(safetensors.safe_open(filePath, 'np').metadata().get('ss_seed', 'Not found')))"
}
source ~/toolkit/zsh/install_members.zsh
source ~/toolkit/zsh/gallery-dl.zsh
source ~/toolkit/zsh/png2mp4.zsh
# Function: c
# Description:
# This function launches ComfyUI with specific settings tailored to the user's preferences.
#
# Usage:
# c
#
# Actions:
# 1. Changes directory to ~/ComfyUI
# 2. Activates the 'comfyui' conda environment
# 3. Launches ComfyUI with the following settings:
# - Listens on all network interfaces (0.0.0.0)
# - Uses 'taesd' as the preview method
# - Enables PyTorch cross-attention
# - Disables xformers
# - Uses the latest version of Comfy-Org/ComfyUI_frontend
# - Enables fast mode
#
# Parameters:
# None
#
# Notes:
# - Requires ComfyUI to be installed in ~/ComfyUI
# - Requires a conda environment named 'comfyui' with necessary dependencies
# - The --listen 0.0.0.0 option allows access from other devices on the network
# - --preview-method taesd provides better previews
# - --use-pytorch-cross-attention and --disable-xformers affect performance and compatibility
# - --front-end-version ensures the latest UI is used
# - --fast option may improve overall performance
c() {
cd ~/ComfyUI &&
conda activate comfyui
python main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --front-end-version Comfy-Org/ComfyUI_frontend@latest --fast
}
#
# Usage:
# conda_prompt_info
#
# Returns:
# A string containing the name of the active Conda environment, enclosed in parentheses.
# If no Conda environment is active, it returns an empty string.
#
# Details:
# 1. Checks if the CONDA_DEFAULT_ENV environment variable is set and non-empty.
# 2. If CONDA_DEFAULT_ENV is set, it echoes the environment name in parentheses.
# 3. If CONDA_DEFAULT_ENV is not set or empty, the function returns silently.
#
# Example output:
# If CONDA_DEFAULT_ENV is set to "myenv", the function will output: (myenv)
#
# Notes:
# - This function is typically used in command prompts or shell scripts to
# visually indicate the active Conda environment to the user.
# - It can be incorporated into PS1 or other prompt variables to automatically
# display the Conda environment in the shell prompt.
conda_prompt_info() {
if [[ -n "$CONDA_DEFAULT_ENV" ]]; then
echo "(${CONDA_DEFAULT_ENV})"
fi
}
# Function: display_custom_help
# Description:
# This function displays custom help information for user-defined functions and aliases.
# It provides a quick reference for commonly used commands and their descriptions.
#
# Usage:
# display_custom_help
#
# Output:
# Prints a formatted list of custom commands and their brief descriptions.
#
# Note:
# Add or modify entries in this function to keep your personal command reference up-to-date.
display_custom_help