Initial commit
Browse filesSigned-off-by: Balazs Horvath <acsipont@gmail.com>
- .tmux.conf +42 -0
- .zshrc +498 -0
- 9em124t2-499968/clip_model.pt +3 -0
- 9em124t2-499968/config.yaml +39 -0
- 9em124t2-499968/image_adapter.pt +3 -0
- 9em124t2-499968/text_model/README.md +202 -0
- 9em124t2-499968/text_model/adapter_config.json +29 -0
- 9em124t2-499968/text_model/adapter_model.safetensors +3 -0
- LICENSE +21 -0
- ascii_art/gaeros +11 -0
- ascii_art/kade +13 -0
- crawl/crawl +146 -0
- crawl/crawl4ai.pyi +58 -0
- crawl/crawl_wikipedia +182 -0
- joy +555 -0
- jtp2 +290 -0
- jtp2_overwrite +316 -0
- paper-qa.code-workspace +11 -0
- papers_please +151 -0
- password +14 -0
.tmux.conf
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# List of plugins
|
2 |
+
set -g @plugin 'tmux-plugins/tpm'
|
3 |
+
set -g @plugin 'tmux-plugins/tmux-sensible'
|
4 |
+
set -g @plugin 'tmux-plugins/tmux-yank'
|
5 |
+
set -g @plugin 'tmux-plugins/tmux-resurrect'
|
6 |
+
set -g @plugin 'tmux-plugins/tmux-continuum'
|
7 |
+
set -g @plugin 'sainnhe/tmux-fzf'
|
8 |
+
set -g @plugin 'catppuccin/tmux'
|
9 |
+
|
10 |
+
# Enable clipboard integration
|
11 |
+
set -g set-clipboard on
|
12 |
+
|
13 |
+
# Enable tmux-continuum and set it to boot on start
|
14 |
+
set -g @continuum-boot 'on'
|
15 |
+
# Set the strategy for tmux-resurrect to use nvim sessions
|
16 |
+
set -g @resurrect-strategy-nvim 'session'
|
17 |
+
|
18 |
+
# Disable the bell action
|
19 |
+
set-option -g bell-action none
|
20 |
+
|
21 |
+
# Set the default shell to zsh without global rc files
|
22 |
+
set-option -g default-command "zsh --no-globalrcs"
|
23 |
+
# Enable setting terminal titles
|
24 |
+
set -g set-titles on
|
25 |
+
# Set the format for terminal titles
|
26 |
+
set -g set-titles-string '#T #{pane_current_command}'
|
27 |
+
# Set the window size to the smallest
|
28 |
+
set -g window-size smallest
|
29 |
+
# Enable aggressive resize for windows
|
30 |
+
setw -g aggressive-resize on
|
31 |
+
# Enable mouse support
|
32 |
+
set -g mouse on
|
33 |
+
# Set the default terminal type to tmux-256color
|
34 |
+
set -g default-terminal "tmux-256color"
|
35 |
+
# Append terminal overrides for xterm-256color
|
36 |
+
set-option -ga terminal-overrides ",xterm-256color:Tc"
|
37 |
+
|
38 |
+
# Bind 'r' to reload the tmux config and display a message
|
39 |
+
bind r source-file ~/.tmux.conf \; display "Config reloaded!"
|
40 |
+
|
41 |
+
# Initialize TMUX plugin manager (keep this line at the very bottom of tmux.conf)
|
42 |
+
run '~/.tmux/plugins/tpm/tpm'
|
.zshrc
ADDED
@@ -0,0 +1,498 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export LANG=ja_JP.UTF-8
|
2 |
+
export LC_ALL=ja_JP.UTF-8
|
3 |
+
|
4 |
+
display_custom_help() {
|
5 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
6 |
+
printf "%s\n" "$(conda env list)"
|
7 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
8 |
+
echo "LLMs"
|
9 |
+
echo "---"
|
10 |
+
echo "conda activate openwebui && open-webui serve --port 6969"
|
11 |
+
echo "ollama serve"
|
12 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
13 |
+
echo "Taggers + Captioners"
|
14 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
15 |
+
echo "JTP2"
|
16 |
+
echo "---"
|
17 |
+
echo "~/toolkit/jtp2 <dir>"
|
18 |
+
echo "Joy Captioner"
|
19 |
+
echo "---"
|
20 |
+
echo "~/source/repos/joy/joy <dir> --custom_prompt \"<prompt>\" --caption_type custom"
|
21 |
+
echo "Waifu Diffusion Tagger:"
|
22 |
+
echo "---"
|
23 |
+
echo "python ~/source/repos/wdv3-timm/wdv3_timm.py <dir> --model eva02"
|
24 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
25 |
+
echo "Database Stuff"
|
26 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
27 |
+
echo "Redis"
|
28 |
+
echo "---"
|
29 |
+
echo "~/db/redis-stable/src/redis-server : Start server."
|
30 |
+
echo "PostgreSQL"
|
31 |
+
echo "---"
|
32 |
+
echo "psql -d postgres -h /tmp : Connect using socket directory."
|
33 |
+
echo "Start server:"
|
34 |
+
echo "pg_ctl -D \$HOME/db/postgresql/data -l \$HOME/db/pgsql.log start"
|
35 |
+
# echo "Commands, Aliases, and Custom Functions:"
|
36 |
+
# echo "----------------------------------------------------------------------------------------------------------------------"
|
37 |
+
# echo "pie : \`pip install -e . --use-pep517\`"
|
38 |
+
# echo "gcs : \`git clone --recurse-submodules\`"
|
39 |
+
# echo "dust : A more intuitive version of du."
|
40 |
+
# echo "ranger : A vim inspired file manager."
|
41 |
+
# echo "htop : Interactive process viewer."
|
42 |
+
# echo "nvtop : Interactive GPU process viewer."
|
43 |
+
# echo "nvitop : An even more interactive GPU process viewer."
|
44 |
+
# echo "nvim : Alias for vim."
|
45 |
+
# echo "rt : Edit tmux config and reload it."
|
46 |
+
# echo "zr : Edit zsh config and reload it."
|
47 |
+
# echo "ta : Attach to tmux session."
|
48 |
+
# echo "ga : Git add, commit, and push."
|
49 |
+
# echo "gs : Git status."
|
50 |
+
# echo "wd : Word diff in git."
|
51 |
+
# echo "grabber : Alias for Grabber-cli."
|
52 |
+
# echo "ls : Alias for 'ls --color=always'."
|
53 |
+
# echo "----------------------------------------------------------------------------------------------------------------------"
|
54 |
+
echo "- 🐺 TOOLS -"
|
55 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
56 |
+
echo "nv : Returns the cuda version number."iexport LANG=ja_JP.UTF-8
|
57 |
+
export LC_ALL=ja_JP.UTF-8
|
58 |
+
|
59 |
+
|
60 |
+
echo "remove_repetition : Removes repetition in txt files in a target directory."
|
61 |
+
echo "copy_sample_prompts : Copies ./sample-prompt.txt file from the current dir to datasets/furry."
|
62 |
+
echo "remove_number_prefix : Removes all numbers prefixed by a _ from the end of every file."
|
63 |
+
echo "count_captions : Counts *.caption and *.txt files in each subdirectory."
|
64 |
+
echo "count_captions_per_folder : Counts *.caption and *.txt files in each subdirectory individually."
|
65 |
+
echo "llama : Runs Meta-Llama-3-8B-Instruct on port 6969."
|
66 |
+
echo "copy_matching_caption_files : Copies matching .caption files for <dir> to the current directory."
|
67 |
+
echo "c : Change to ComfyUI directory and start the server."
|
68 |
+
echo "t : Start TensorBoard with logs directory."
|
69 |
+
echo "png2mp4 : Convert PNG sequence to MP4 video."
|
70 |
+
echo "seed <file> : Display the seed from a safetensors file."
|
71 |
+
echo "swch <branch> : Clean repo and switch to specified git branch."
|
72 |
+
echo "convert_to_jxl <directory> : Convert JPG, JPEG, and PNG files to JXL in the specified directory."
|
73 |
+
echo "convert_pxl_to_png <directory> : Convert PXL files to PNG in the specified directory."
|
74 |
+
echo "replace_text_in_files [dir] <src> <replace> : Perform text replacement on *.txt files in a target directory."
|
75 |
+
echo "update_dir [directory] : Update git repositories in subdirectories."
|
76 |
+
echo "inject_to_captions [dir] \"txt\" : Add prefix to the beginning of each text file in a directory."
|
77 |
+
echo "chop_lora <input_file> : Generate multiple versions of a Lora file with different presets."
|
78 |
+
echo "----------------------------------------------------------------------------------------------------------------------"
|
79 |
+
}
|
80 |
+
|
81 |
+
export RUST_BACKTRACE=1
|
82 |
+
|
83 |
+
# This function `nv` retrieves the version of the NVIDIA CUDA Compiler (nvcc) installed on the system.
|
84 |
+
# It extracts the version number from the `nvcc --version` command output.
|
85 |
+
# The version number is then formatted by removing the dot (e.g., 12.6 becomes 126).
|
86 |
+
# Finally, the function returns the formatted version number.
|
87 |
+
function nv() {
|
88 |
+
# Get the nvcc version output
|
89 |
+
local nvcc_output=$(nvcc --version)
|
90 |
+
|
91 |
+
# Extract the version number (12.6)
|
92 |
+
local version=$(echo "$nvcc_output" | grep -oP 'release \K[0-9]+\.[0-9]+')
|
93 |
+
|
94 |
+
# Remove the dot to get 126
|
95 |
+
local result=$(echo "$version" | tr -d '.')
|
96 |
+
|
97 |
+
# Print the result
|
98 |
+
echo $result
|
99 |
+
}
|
100 |
+
|
101 |
+
export BNB_CUDA_VERSION=126
|
102 |
+
|
103 |
+
# Function to remove consecutive repeated words in text files within a directory
|
104 |
+
remove_repetition() {
|
105 |
+
local dir=$1 # The directory to search for text files
|
106 |
+
# Find all .txt files in the specified directory and process each file
|
107 |
+
find "$dir" -type f -name "*.txt" | while read -r file; do
|
108 |
+
# Use awk to process each line of the file
|
109 |
+
awk '
|
110 |
+
{
|
111 |
+
n = split($0, words, " ") # Split the line into words
|
112 |
+
for (i = n; i > 1; i--) { # Iterate from the last word to the second word
|
113 |
+
if (words[i] != words[i-1]) break # Stop if the current word is not equal to the previous word
|
114 |
+
}
|
115 |
+
for (j = 1; j <= i; j++) { # Print the words up to the point where repetition ends
|
116 |
+
printf "%s%s", words[j], (j == i ? ORS : OFS) # Print the word followed by a space or newline
|
117 |
+
}
|
118 |
+
}
|
119 |
+
' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" # Save the processed content to a temporary file and replace the original file
|
120 |
+
done
|
121 |
+
}
|
122 |
+
|
123 |
+
# This alias 'pie' is a shortcut for installing a Python package in editable mode
|
124 |
+
# using the pip command with the --use-pep517 option.
|
125 |
+
alias pie='pip install -e . --use-pep517'
|
126 |
+
|
127 |
+
# Function to remove specific tags from all *.txt files in a target directory recursively
|
128 |
+
remove_boys() {
|
129 |
+
# Assign the first argument passed to the function to the variable target_dir
|
130 |
+
local target_dir="$1"
|
131 |
+
|
132 |
+
# Find all *.txt files in the target directory and its subdirectories
|
133 |
+
find "$target_dir" -type f -name "*.txt" | while read -r file; do
|
134 |
+
# Use sed to remove occurrences of [1-9]boy, [1-9]boys, [1-9]girl, and [1-9]girls along with a comma and space character
|
135 |
+
# -i.bak creates a backup of the original file with a .bak extension
|
136 |
+
# -E enables extended regular expressions
|
137 |
+
sed -i.bak -E 's/, ([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)//g' "$file"
|
138 |
+
done
|
139 |
+
}
|
140 |
+
|
141 |
+
export DOTNET_CLI_TELEMETRY_OPTOUT=1
|
142 |
+
|
143 |
+
# Organizes a sample prompt file from the current directory to datasets/furry.
|
144 |
+
# It moves the file named sample-prompts.txt to either
|
145 |
+
# ~/datasets/furry/sample_prompts/pony or ~/datasets/furry/sample_prompts/compass based on the content.
|
146 |
+
# If the file contains the regexp 'score_*', it is moved to ~/datasets/furry/sample_prompts/pony.
|
147 |
+
# Otherwise, it is moved to ~/datasets/furry/sample_prompts/compass.
|
148 |
+
# The -v option is used with cp to provide verbose output.
|
149 |
+
copy_sample_prompts() {
|
150 |
+
file="./sample-prompts.txt"
|
151 |
+
if grep -q 'score_*' "$file"; then
|
152 |
+
cp -v "$file" ~/datasets/furry/sample_prompts/pony/
|
153 |
+
else
|
154 |
+
cp -v "$file" ~/datasets/furry/sample_prompts/compass/
|
155 |
+
fi
|
156 |
+
|
157 |
+
echo "File has been organized."
|
158 |
+
}
|
159 |
+
|
160 |
+
# Removes all numbers prefixed by a _ from the end of every file before the file extension
|
161 |
+
remove_number_prefix() {
|
162 |
+
# Loop through all files in the current directory and its subdirectories
|
163 |
+
for file in **/*_[0-9]*.*; do
|
164 |
+
# Get the new file name by removing '_number' before the file extension
|
165 |
+
new_file="${file%_[0-9]*.*}.${file##*.}"
|
166 |
+
# Rename the file
|
167 |
+
mv "$file" "$new_file"
|
168 |
+
done
|
169 |
+
}
|
170 |
+
|
171 |
+
# Counts all *.caption and *.txt files in all subdirectories.
|
172 |
+
count_captions() {
|
173 |
+
caption_count=$(find . -type f -name "*.caption" | wc -l)
|
174 |
+
txt_count=$(find . -type f -name "*.txt" | wc -l)
|
175 |
+
echo "*.caption files: $caption_count"
|
176 |
+
echo "*.txt files: $txt_count"
|
177 |
+
}
|
178 |
+
|
179 |
+
# Counts *.caption and *.txt files in each subdirectory individually.
|
180 |
+
count_captions_per_folder() {
|
181 |
+
for dir in */; do
|
182 |
+
echo "Directory: $dir"
|
183 |
+
echo -n "*.caption files: "
|
184 |
+
find "$dir" -type f -name "*.caption" | wc -l
|
185 |
+
echo -n "*.txt files: "
|
186 |
+
find "$dir" -type f -name "*.txt" | wc -l
|
187 |
+
done
|
188 |
+
}
|
189 |
+
|
190 |
+
# open-webui
|
191 |
+
oui() {
|
192 |
+
conda activate openwebui
|
193 |
+
open-webui serve --port 6969
|
194 |
+
}
|
195 |
+
|
196 |
+
llama() {
|
197 |
+
~/models/Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile -cb -np 4 -a llama-3-8b --embedding --port 11434
|
198 |
+
}
|
199 |
+
|
200 |
+
alias gcs='git clone --recurse-submodules'
|
201 |
+
|
202 |
+
# Function to copy matching .caption files
|
203 |
+
copy_matching_caption_files() {
|
204 |
+
# Define the target directory
|
205 |
+
TARGET_DIR="$1"
|
206 |
+
|
207 |
+
# Loop through each image file in the current directory
|
208 |
+
for image_file in *.(jpg|jpeg|png|gif|bmp|tiff|webp|jxl); do
|
209 |
+
# Check if the file exists (to handle cases where no files match the pattern)
|
210 |
+
if [[ -f "$image_file" ]]; then
|
211 |
+
# Extract the base name (without extension)
|
212 |
+
base_name="${image_file%.*}"
|
213 |
+
|
214 |
+
# Define the corresponding .caption file in the target directory
|
215 |
+
caption_file="$TARGET_DIR/$base_name.caption"
|
216 |
+
|
217 |
+
# Check if the .caption file exists
|
218 |
+
if [[ -f "$caption_file" ]]; then
|
219 |
+
# Copy the .caption file to the current directory
|
220 |
+
cp "$caption_file" .
|
221 |
+
echo "Copied $caption_file to the current directory."
|
222 |
+
else
|
223 |
+
echo "No matching .caption file for $image_file."
|
224 |
+
fi
|
225 |
+
fi
|
226 |
+
done
|
227 |
+
}
|
228 |
+
|
229 |
+
|
230 |
+
# This script performs a text replacement operation in all .txt files within a specified directory.
|
231 |
+
# It takes three arguments:
|
232 |
+
# 1. target_dir: The directory containing the .txt files where the text replacement will occur.
|
233 |
+
# 2. search_text: The text string that needs to be replaced.
|
234 |
+
# 3. replace_text: The text string that will replace the search_text.
|
235 |
+
#
|
236 |
+
# The script uses a for loop to iterate through all .txt files in the target directory.
|
237 |
+
# It utilizes the 'sed' command to perform an in-place replacement of the search_text with the replace_text in each file.
|
238 |
+
# After processing all files, it prints a message indicating the completion of the text replacement operation.
|
239 |
+
replace_text_in_files() {
|
240 |
+
local target_dir=$1
|
241 |
+
local search_text=$2
|
242 |
+
local replace_text=$3
|
243 |
+
|
244 |
+
# Loop through all .txt files in the target directory
|
245 |
+
for file in "$target_dir"/*.txt; do
|
246 |
+
# Use sed to replace the text
|
247 |
+
sed -i "s/$search_text/$replace_text/g" "$file"
|
248 |
+
done
|
249 |
+
|
250 |
+
echo "Text replacement complete in $target_dir!"
|
251 |
+
}
|
252 |
+
|
253 |
+
# Example usage:
|
254 |
+
# replace_text_in_files "/path/to/directory" "squishy (artist)" "by squishy (artist)"
|
255 |
+
|
256 |
+
|
257 |
+
# This script adds a specified prefix to the beginning of each text file in a given directory.
|
258 |
+
# Usage: inject_to_captions <directory> <prefix>
|
259 |
+
# Arguments:
|
260 |
+
# <directory> - The directory containing the text files to be modified.
|
261 |
+
# <prefix> - The prefix to be added to the beginning of each text file.
|
262 |
+
# The script checks if the specified directory exists and iterates over each text file in the directory.
|
263 |
+
# For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
|
264 |
+
# If the directory does not exist, it prints an error message.
|
265 |
+
inject_to_captions() {
|
266 |
+
local dir="$1"
|
267 |
+
local prefix="$2"
|
268 |
+
if [[ -d "$dir" ]]; then
|
269 |
+
for file in "$dir"/*.txt; do
|
270 |
+
if [[ -f "$file" ]]; then
|
271 |
+
if ! grep -q "$prefix" "$file"; then
|
272 |
+
# Use a temporary file to store the modified content
|
273 |
+
local temp_file=$(mktemp)
|
274 |
+
echo "${prefix}, $(cat "$file")" > "$temp_file"
|
275 |
+
mv "$temp_file" "$file"
|
276 |
+
echo "Added '${prefix}, ' to the front of $file"
|
277 |
+
else
|
278 |
+
echo "The tag '${prefix}' already exists in $file"
|
279 |
+
fi
|
280 |
+
fi
|
281 |
+
done
|
282 |
+
else
|
283 |
+
echo "Directory $dir does not exist."
|
284 |
+
fi
|
285 |
+
}
|
286 |
+
|
287 |
+
# Function to update git repositories in subdirectories
|
288 |
+
update_dir() {
|
289 |
+
local target_dir="${1:-.}"
|
290 |
+
|
291 |
+
# Check if there are any subdirectories
|
292 |
+
if [[ -n "$(find "$target_dir" -mindepth 1 -maxdepth 1 -type d)" ]]; then
|
293 |
+
for dir in "$target_dir"/*/; do
|
294 |
+
if [[ -d "$dir" ]]; then
|
295 |
+
(
|
296 |
+
cd "$dir" || return
|
297 |
+
# If the directory is a git repository, pull the latest changes
|
298 |
+
if [[ -d ".git" ]]; then
|
299 |
+
echo "Updating $(pwd)"
|
300 |
+
git pull
|
301 |
+
fi
|
302 |
+
)
|
303 |
+
fi
|
304 |
+
done
|
305 |
+
fi
|
306 |
+
}
|
307 |
+
|
308 |
+
export TOKENIZERS_PARALLELISM=false
|
309 |
+
|
310 |
+
alias grabber="Grabber-cli"
|
311 |
+
|
312 |
+
#export force_color_prompt=yes
|
313 |
+
|
314 |
+
chop_lora() {
|
315 |
+
local input_file="$1"
|
316 |
+
local base_name="${input_file:r}" # Remove extension
|
317 |
+
|
318 |
+
# Define presets and their corresponding vector strings
|
319 |
+
declare -A presets=(
|
320 |
+
["ringdingding"] = "1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0"
|
321 |
+
["squeaker"] = "1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0"
|
322 |
+
["heavylifter"] = "1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0"
|
323 |
+
["style1"] = "1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
|
324 |
+
["style2"] = "1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
|
325 |
+
["beeg"] = "1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0"
|
326 |
+
["all"] = "1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"
|
327 |
+
["allin"] = "1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0"
|
328 |
+
["allmid"] = "1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0"
|
329 |
+
["allout"] = "1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1"
|
330 |
+
)
|
331 |
+
|
332 |
+
for preset in ${(k)presets}; do
|
333 |
+
local output_file="${base_name}-${preset}.safetensors"
|
334 |
+
local vector_string="${presets[$preset]}"
|
335 |
+
echo "Generating $output_file"
|
336 |
+
python ~/source/repos/resize_lora/chop_blocks.py "$input_file" "$vector_string" -o "$output_file"
|
337 |
+
done
|
338 |
+
}
|
339 |
+
|
340 |
+
function swch() {
|
341 |
+
if [ -z "$1" ]; then
|
342 |
+
echo "Please provide a branch name."
|
343 |
+
return 1
|
344 |
+
fi
|
345 |
+
branchname=$1
|
346 |
+
git clean -fxd && git pull && git checkout $branchname
|
347 |
+
}
|
348 |
+
|
349 |
+
export COMFYUI_PATH="$HOME/ComfyUI"
|
350 |
+
export ZSH="$HOME/.oh-my-zsh"
|
351 |
+
|
352 |
+
ZSH_THEME="kade"
|
353 |
+
# CASE_SENSITIVE="true"
|
354 |
+
# HYPHEN_INSENSITIVE="true"
|
355 |
+
# DISABLE_MAGIC_FUNCTIONS="true"
|
356 |
+
# DISABLE_LS_COLORS="true"
|
357 |
+
# DISABLE_AUTO_TITLE="true"
|
358 |
+
# ENABLE_CORRECTION="true"
|
359 |
+
# COMPLETION_WAITING_DOTS="true"
|
360 |
+
# DISABLE_UNTRACKED_FILES_DIRTY="true"
|
361 |
+
|
362 |
+
plugins=(git autojump conda-env)
|
363 |
+
|
364 |
+
extract_iframes() {
|
365 |
+
# Assign input arguments
|
366 |
+
input_file="$1"
|
367 |
+
scene_change_fraction="${2:-0.1}"
|
368 |
+
|
369 |
+
# Get the base filename without extension
|
370 |
+
base_name=$(basename "$input_file" .webm)
|
371 |
+
|
372 |
+
# Run ffmpeg command
|
373 |
+
/usr/bin/ffmpeg -i "$input_file" -f image2 -vf "select=eq(pict_type\,PICT_TYPE_I)*gt(scene\,$scene_change_fraction),showinfo" -fps_mode vfr "${base_name}-%06d.png"
|
374 |
+
}
|
375 |
+
|
376 |
+
convert_to_jxl() {
|
377 |
+
local target_directory="$1"
|
378 |
+
|
379 |
+
# Ensure the target directory exists
|
380 |
+
if [[ ! -d "$target_directory" ]]; then
|
381 |
+
echo "The specified directory does not exist: $target_directory" >&2
|
382 |
+
return 1
|
383 |
+
fi
|
384 |
+
|
385 |
+
# Find all JPG, JPEG, and PNG files in the target directory and all subdirectories
|
386 |
+
find "$target_directory" \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" \) -type f | while read -r file; do
|
387 |
+
input_path="$file"
|
388 |
+
output_path="${file%.*}.jxl"
|
389 |
+
|
390 |
+
# Convert to JXL using ImageMagick
|
391 |
+
if magick convert "$input_path" "$output_path"; then
|
392 |
+
echo "Converted: $input_path -> $output_path"
|
393 |
+
else
|
394 |
+
echo "Failed to convert $input_path" >&2
|
395 |
+
fi
|
396 |
+
done
|
397 |
+
|
398 |
+
echo "Conversion complete."
|
399 |
+
}
|
400 |
+
|
401 |
+
|
402 |
+
convert_pxl_to_png() {
|
403 |
+
local target_directory="$1"
|
404 |
+
|
405 |
+
# Ensure the target directory exists
|
406 |
+
if [[ ! -d "$target_directory" ]]; then
|
407 |
+
echo "The specified directory does not exist: $target_directory" >&2
|
408 |
+
return 1
|
409 |
+
fi
|
410 |
+
|
411 |
+
# Find all PXL files in the target directory and all subdirectories
|
412 |
+
find "$target_directory" -type f -name "*.pxl" | while read -r file; do
|
413 |
+
input_path="$file"
|
414 |
+
output_path="${file%.pxl}.png"
|
415 |
+
|
416 |
+
# Convert PXL to PNG using ImageMagick
|
417 |
+
if magick convert "$input_path" "$output_path"; then
|
418 |
+
echo "Converted: $input_path -> $output_path"
|
419 |
+
else
|
420 |
+
echo "Failed to convert $input_path" >&2
|
421 |
+
fi
|
422 |
+
done
|
423 |
+
|
424 |
+
echo "Conversion complete."
|
425 |
+
}
|
426 |
+
|
427 |
+
|
428 |
+
seed() {
|
429 |
+
local filePath="$1"
|
430 |
+
python3 -c "
|
431 |
+
import safetensors, json
|
432 |
+
filePath = '$filePath'
|
433 |
+
print(json.loads(safetensors.safe_open(filePath, 'np').metadata().get('ss_seed', 'Not found')))
|
434 |
+
"
|
435 |
+
}
|
436 |
+
|
437 |
+
png2mp4() {
|
438 |
+
ffmpeg -framerate 8 -pattern_type glob -i '*.png' -vf scale=512x512 -crf 28 \
|
439 |
+
-c:v libx264 -pix_fmt yuv420p out.mp4
|
440 |
+
}
|
441 |
+
|
442 |
+
|
443 |
+
source $ZSH/oh-my-zsh.sh
|
444 |
+
|
445 |
+
export PATH=$PATH:$HOME/.local/bin:$HOME/source/repos/dataset-tools/target/x86_64-unknown-linux-gnu/release:$HOME/.cargo/bin:$HOME/miniconda3/bin:$HOME/toolkit:$HOME/db/redis-stable/src:$HOME/db/postgresql/bin
|
446 |
+
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
|
447 |
+
export COMFYUI_MODEL_PATH=/home/kade/ComfyUI/models
|
448 |
+
|
449 |
+
c_old() {
|
450 |
+
cd ~/ComfyUI &&
|
451 |
+
python3.12 main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --fast
|
452 |
+
}
|
453 |
+
|
454 |
+
c() {
|
455 |
+
cd ~/ComfyUI &&
|
456 |
+
conda activate comfyui
|
457 |
+
python main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --front-end-version Comfy-Org/ComfyUI_frontend@latest --fast
|
458 |
+
}
|
459 |
+
|
460 |
+
alias t="tensorboard --logdir=$HOME/output_dir/logs"
|
461 |
+
alias nvim="vim"
|
462 |
+
alias rt="vim ~/.tmux.conf && echo \"Reloading tmux config\" && tmux source ~/.tmux.conf"
|
463 |
+
alias zr="vim ~/.zshrc && echo \"Reloading zsh config\" && source ~/.zshrc"
|
464 |
+
alias ta="tmux att"
|
465 |
+
alias ga="git add . && git commit -avs && git push"
|
466 |
+
alias gs="git status"
|
467 |
+
alias wd="git diff --word-diff-regex='[^,]+' --patience"
|
468 |
+
|
469 |
+
source /home/kade/.config/broot/launcher/bash/br
|
470 |
+
|
471 |
+
[ -f ~/.fzf.zsh ] && source ~/.fzf.zsh
|
472 |
+
|
473 |
+
alias ls='ls --color=always'
|
474 |
+
|
475 |
+
# >>> conda initialize >>>
|
476 |
+
# !! Contents within this block are managed by 'conda init' !!
|
477 |
+
__conda_setup="$('/home/kade/miniconda3/bin/conda' 'shell.zsh' 'hook' 2> /dev/null)"
|
478 |
+
if [ $? -eq 0 ]; then
|
479 |
+
eval "$__conda_setup"
|
480 |
+
else
|
481 |
+
if [ -f "/home/kade/miniconda3/etc/profile.d/conda.sh" ]; then
|
482 |
+
. "/home/kade/miniconda3/etc/profile.d/conda.sh"
|
483 |
+
else
|
484 |
+
export PATH="/home/kade/miniconda3/bin:$PATH"
|
485 |
+
fi
|
486 |
+
fi
|
487 |
+
unset __conda_setup
|
488 |
+
# <<< conda initialize <<<
|
489 |
+
|
490 |
+
unset CONDA_CHANGEPS1
|
491 |
+
|
492 |
+
function conda_prompt_info() {
|
493 |
+
if [[ -n "$CONDA_DEFAULT_ENV" ]]; then
|
494 |
+
echo "(${CONDA_DEFAULT_ENV})"
|
495 |
+
fi
|
496 |
+
}
|
497 |
+
|
498 |
+
display_custom_help
|
9em124t2-499968/clip_model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d7b0548d12fa649370896982c2af9d03d43285b782bd47639c96e6e0b29473c
|
3 |
+
size 1713067838
|
9em124t2-499968/config.yaml
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_project: joy-caption-1
|
2 |
+
device_batch_size: 2
|
3 |
+
batch_size: 256
|
4 |
+
learning_rate: 0.0002
|
5 |
+
warmup_samples: 18000
|
6 |
+
max_samples: 500000
|
7 |
+
save_every: 50000
|
8 |
+
test_every: 50000
|
9 |
+
use_amp: true
|
10 |
+
grad_scaler: true
|
11 |
+
lr_scheduler_type: cosine
|
12 |
+
min_lr_ratio: 0.0
|
13 |
+
allow_tf32: true
|
14 |
+
seed: 69
|
15 |
+
num_workers: 8
|
16 |
+
optimizer_type: adamw
|
17 |
+
adam_beta1: 0.9
|
18 |
+
adam_beta2: 0.999
|
19 |
+
adam_eps: 1.0e-08
|
20 |
+
adam_weight_decay: 0.0
|
21 |
+
clip_grad_norm: 1.0
|
22 |
+
dataset: fancyfeast/joy-captioning-20240917a
|
23 |
+
clip_model: google/siglip-so400m-patch14-384
|
24 |
+
text_model: meta-llama/Meta-Llama-3.1-8B
|
25 |
+
resume: null
|
26 |
+
gradient_checkpointing: false
|
27 |
+
test_size: 2048
|
28 |
+
grad_scaler_init: 65536.0
|
29 |
+
max_caption_length: 257
|
30 |
+
num_image_tokens: 32
|
31 |
+
adapter_type: mlp
|
32 |
+
text_model_dtype: bfloat16
|
33 |
+
pre_test: false
|
34 |
+
train_image_model: true
|
35 |
+
image_model_lr: null
|
36 |
+
train_lora: true
|
37 |
+
lora_r: 64
|
38 |
+
lora_alpha: 16
|
39 |
+
lora_dropout: 0.1
|
9em124t2-499968/image_adapter.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e53c3bf8df745a3c19ae3c70dbf9bf23cfdc8f3fdb937000a4eafd2a36914661
|
3 |
+
size 86067714
|
9em124t2-499968/text_model/README.md
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: meta-llama/Meta-Llama-3.1-8B
|
3 |
+
library_name: peft
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
### Framework versions
|
201 |
+
|
202 |
+
- PEFT 0.12.0
|
9em124t2-499968/text_model/adapter_config.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 16,
|
14 |
+
"lora_dropout": 0.1,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 64,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"q_proj",
|
24 |
+
"v_proj"
|
25 |
+
],
|
26 |
+
"task_type": "CAUSAL_LM",
|
27 |
+
"use_dora": false,
|
28 |
+
"use_rslora": false
|
29 |
+
}
|
9em124t2-499968/text_model/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b48221de174ab0db7b46b4833118c5c0a4c2bf0b51b77b4cc4ab04651bd06cca
|
3 |
+
size 109069176
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Balazs Horvath
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
ascii_art/gaeros
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
ascii_art = """
|
5 |
+
▄▄ • ▄▄▄· ▄▄▄ .▄▄▄ .▄▄ ·
|
6 |
+
▐█ ▀ ▪▐█ ▀█ ▀▄.▀·▀▄ █·▪ ▐█ ▀.
|
7 |
+
▄█ ▀█▄▄█▀▀█ ▐▀▀▪▄▐▀▀▄ ▄█▀▄ ▄▀▀▀█▄
|
8 |
+
▐█▄▪▐█▐█ ▪▐▌▐█▄▄▌▐█•█▌▐█▌.▐▌▐█▄▪▐█
|
9 |
+
·▀▀▀▀ ▀ ▀ ▀▀▀ .▀ ▀ ▀█▄▀▪ ▀▀▀▀
|
10 |
+
"""
|
11 |
+
print(ascii_art)
|
ascii_art/kade
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
ascii_art = """
|
5 |
+
▄ •▄ ▄▄▄· ·▄▄▄▄ ▄▄▄ .
|
6 |
+
█▌▄▌▪▐█ ▀█ ██▪ ██ ▀▄.▀·
|
7 |
+
▐▀▀▄·▄█▀▀█ ▐█· ▐█▌▐▀▀▪▄
|
8 |
+
▐█.█▌▐█ ▪▐▌██. ██ ▐█▄▄▌
|
9 |
+
·▀ ▀ ▀ ▀ ▀▀▀▀▀• ▀▀▀
|
10 |
+
"""
|
11 |
+
|
12 |
+
print(ascii_art)
|
13 |
+
|
crawl/crawl
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
Web Crawler and Content Saver
|
6 |
+
|
7 |
+
This module provides functionality to crawl web pages, extract content,
|
8 |
+
and save the results including markdown text and images. It uses the
|
9 |
+
WebCrawler class from crawl4ai and implements parallel image downloading.
|
10 |
+
"""
|
11 |
+
|
12 |
+
import sys
|
13 |
+
import os
|
14 |
+
import re
|
15 |
+
import platform
|
16 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
17 |
+
|
18 |
+
import requests
|
19 |
+
from crawl4ai import WebCrawler
|
20 |
+
|
21 |
+
|
22 |
+
def create_crawler():
|
23 |
+
"""
|
24 |
+
Create and initialize a WebCrawler instance.
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
WebCrawler: An initialized WebCrawler object.
|
28 |
+
"""
|
29 |
+
crawler = WebCrawler(verbose=True)
|
30 |
+
crawler.warmup()
|
31 |
+
return crawler
|
32 |
+
|
33 |
+
|
34 |
+
def sanitize_filename(filename):
|
35 |
+
"""
|
36 |
+
Remove invalid characters from a filename to make it Windows-compatible.
|
37 |
+
|
38 |
+
Args:
|
39 |
+
filename (str): The original filename.
|
40 |
+
|
41 |
+
Returns:
|
42 |
+
str: The sanitized filename.
|
43 |
+
"""
|
44 |
+
# Remove invalid characters for Windows file names
|
45 |
+
return re.sub(r'[<>:"/\\|?*]', '', filename)
|
46 |
+
|
47 |
+
|
48 |
+
def download_image(session, image_url, save_dir):
|
49 |
+
"""
|
50 |
+
Download an image from a given URL and save it to the specified directory.
|
51 |
+
|
52 |
+
Args:
|
53 |
+
session (requests.Session):
|
54 |
+
The requests session to use for downloading.
|
55 |
+
image_url (str):
|
56 |
+
The URL of the image to download.
|
57 |
+
save_dir (str):
|
58 |
+
The directory to save the downloaded image.
|
59 |
+
"""
|
60 |
+
try:
|
61 |
+
# Ensure the URL has a scheme
|
62 |
+
if not re.match(r'^https?://', image_url):
|
63 |
+
image_url = 'https://' + image_url.lstrip('/')
|
64 |
+
|
65 |
+
image_filename = os.path.basename(image_url).split('?')[0]
|
66 |
+
sanitized_image_filename = sanitize_filename(image_filename)
|
67 |
+
image_path = os.path.join(save_dir, sanitized_image_filename)
|
68 |
+
|
69 |
+
response = session.get(image_url, stream=True)
|
70 |
+
response.raise_for_status()
|
71 |
+
with open(image_path, 'wb') as image_file:
|
72 |
+
for chunk in response.iter_content(chunk_size=8192):
|
73 |
+
image_file.write(chunk)
|
74 |
+
print(f"Saved image: {image_path}")
|
75 |
+
except requests.RequestException as e:
|
76 |
+
print(f"Error downloading image {image_url}: {str(e)}")
|
77 |
+
except IOError as e:
|
78 |
+
print(f"Error saving image {image_url}: {str(e)}")
|
79 |
+
|
80 |
+
|
81 |
+
def save_result(target_url):
|
82 |
+
"""
|
83 |
+
Crawl a given URL, extract content, and save the results.
|
84 |
+
|
85 |
+
This function crawls the specified URL, saves the markdown content,
|
86 |
+
and downloads all associated images in parallel.
|
87 |
+
|
88 |
+
Args:
|
89 |
+
target_url (str): The URL to crawl and save content from.
|
90 |
+
"""
|
91 |
+
crawler = create_crawler()
|
92 |
+
result = crawler.run(url=target_url)
|
93 |
+
title = result.metadata.get('title', 'untitled')
|
94 |
+
sanitized_title = sanitize_filename(title).replace(" ", "_")
|
95 |
+
|
96 |
+
# Choose the appropriate base path based on the operating system
|
97 |
+
if platform.system() == "Windows":
|
98 |
+
base_path = "E:\\knowledgebase\\Saved Websites\\"
|
99 |
+
else:
|
100 |
+
base_path = "/home/kade/saved_websites/"
|
101 |
+
|
102 |
+
save_dir = os.path.join(base_path, sanitized_title)
|
103 |
+
os.makedirs(save_dir, exist_ok=True)
|
104 |
+
|
105 |
+
# Save markdown
|
106 |
+
save_path = os.path.join(save_dir, f"{sanitized_title}.md")
|
107 |
+
with open(save_path, "w", encoding="utf-8") as file:
|
108 |
+
file.write(result.markdown)
|
109 |
+
print(f"Saved markdown to {save_path}")
|
110 |
+
|
111 |
+
# Save images in parallel
|
112 |
+
if 'images' in result.media and isinstance(result.media['images'], list):
|
113 |
+
session = requests.Session()
|
114 |
+
headers = {
|
115 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
116 |
+
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
117 |
+
'Chrome/91.0.4472.124 Safari/537.36',
|
118 |
+
'Referer': target_url,
|
119 |
+
'Accept': ('image/avif,image/webp,image/apng,image/svg+xml,'
|
120 |
+
'image/*,*/*;q=0.8'),
|
121 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
122 |
+
'Sec-Fetch-Dest': 'image',
|
123 |
+
'Sec-Fetch-Mode': 'no-cors',
|
124 |
+
'Sec-Fetch-Site': 'cross-site',
|
125 |
+
}
|
126 |
+
session.headers.update(headers)
|
127 |
+
|
128 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
129 |
+
futures = []
|
130 |
+
for image_data in result.media['images']:
|
131 |
+
if 'src' in image_data:
|
132 |
+
futures.append(executor.submit(download_image,
|
133 |
+
session,
|
134 |
+
image_data['src'],
|
135 |
+
save_dir))
|
136 |
+
|
137 |
+
for future in as_completed(futures):
|
138 |
+
future.result()
|
139 |
+
|
140 |
+
|
141 |
+
if __name__ == "__main__":
|
142 |
+
if len(sys.argv) != 2:
|
143 |
+
print("Usage: python crawl.py <URL>")
|
144 |
+
else:
|
145 |
+
url = sys.argv[1]
|
146 |
+
save_result(url)
|
crawl/crawl4ai.pyi
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This module provides a WebCrawler class for AI-related web crawling tasks.
|
3 |
+
|
4 |
+
The WebCrawler class is designed to crawl web pages, potentially for
|
5 |
+
AI-related data extraction or analysis. It offers methods for initializing
|
6 |
+
the crawler, warming it up, and running crawl operations on specified URLs.
|
7 |
+
|
8 |
+
Classes:
|
9 |
+
WebCrawler: A web crawler for AI-related tasks.
|
10 |
+
|
11 |
+
Example:
|
12 |
+
crawler = WebCrawler(verbose=True)
|
13 |
+
crawler.warmup()
|
14 |
+
result = crawler.run("https://example.com")
|
15 |
+
"""
|
16 |
+
|
17 |
+
from typing import Any
|
18 |
+
|
19 |
+
|
20 |
+
class WebCrawler:
|
21 |
+
"""
|
22 |
+
A web crawler for AI-related tasks.
|
23 |
+
|
24 |
+
This class provides functionality to crawl web pages,
|
25 |
+
potentially for AI-related data extraction or analysis.
|
26 |
+
|
27 |
+
Attributes:
|
28 |
+
verbose (bool): If True, enables verbose output during crawling.
|
29 |
+
|
30 |
+
Methods:
|
31 |
+
warmup(): Prepares the crawler for operation.
|
32 |
+
run(url: str): Crawls the specified URL and returns the result.
|
33 |
+
"""
|
34 |
+
|
35 |
+
def __init__(self, verbose: bool = False) -> None:
|
36 |
+
self.verbose: bool = verbose
|
37 |
+
|
38 |
+
def warmup(self) -> None:
|
39 |
+
"""
|
40 |
+
Prepares the crawler for operation.
|
41 |
+
|
42 |
+
This method should be called before running the crawler to ensure
|
43 |
+
all necessary resources and configurations are set up.
|
44 |
+
"""
|
45 |
+
|
46 |
+
def run(self, url: str) -> Any:
|
47 |
+
"""
|
48 |
+
Crawls the specified URL and returns the result.
|
49 |
+
|
50 |
+
Args:
|
51 |
+
url (str): The URL to crawl.
|
52 |
+
|
53 |
+
Returns:
|
54 |
+
Any: The result of the crawling operation. The specific type
|
55 |
+
depends on the implementation and could be raw HTML,
|
56 |
+
parsed data, or any other relevant information.
|
57 |
+
"""
|
58 |
+
|
crawl/crawl_wikipedia
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
Web Crawler and Content Saver
|
6 |
+
|
7 |
+
This module provides functionality to crawl web pages, extract content,
|
8 |
+
and save the results including markdown text and images. It uses the
|
9 |
+
WebCrawler class from crawl4ai and implements parallel image downloading.
|
10 |
+
"""
|
11 |
+
|
12 |
+
import sys
|
13 |
+
import os
|
14 |
+
import re
|
15 |
+
import platform
|
16 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
17 |
+
from urllib.parse import urljoin
|
18 |
+
from bs4 import BeautifulSoup
|
19 |
+
|
20 |
+
import requests
|
21 |
+
from crawl4ai import WebCrawler
|
22 |
+
|
23 |
+
|
24 |
+
def create_crawler():
|
25 |
+
"""
|
26 |
+
Create and initialize a WebCrawler instance.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
WebCrawler: An initialized WebCrawler object.
|
30 |
+
"""
|
31 |
+
crawler = WebCrawler(verbose=True)
|
32 |
+
crawler.warmup()
|
33 |
+
return crawler
|
34 |
+
|
35 |
+
|
36 |
+
def sanitize_filename(filename):
|
37 |
+
"""
|
38 |
+
Remove invalid characters from a filename to make it Windows-compatible.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
filename (str): The original filename.
|
42 |
+
|
43 |
+
Returns:
|
44 |
+
str: The sanitized filename.
|
45 |
+
"""
|
46 |
+
# Remove invalid characters for Windows file names
|
47 |
+
return re.sub(r'[<>:"/\\|?*]', '', filename)
|
48 |
+
|
49 |
+
|
50 |
+
def get_full_size_image_url(session, image_url, base_url):
|
51 |
+
"""
|
52 |
+
Attempt to find the full-size image URL from a thumbnail URL.
|
53 |
+
|
54 |
+
Args:
|
55 |
+
session (requests.Session): The requests session to use.
|
56 |
+
image_url (str): The thumbnail image URL.
|
57 |
+
base_url (str): The base URL of the page being crawled.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
str: The full-size image URL if found, otherwise the original URL.
|
61 |
+
"""
|
62 |
+
try:
|
63 |
+
response = session.get(image_url)
|
64 |
+
response.raise_for_status()
|
65 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
66 |
+
|
67 |
+
# Look for common full-size image patterns
|
68 |
+
full_size_link = soup.find('a', class_=re.compile(r'fullimage|full-size'))
|
69 |
+
if full_size_link and full_size_link.get('href'):
|
70 |
+
return urljoin(base_url, full_size_link['href'])
|
71 |
+
|
72 |
+
# If no full-size link is found, return the original URL
|
73 |
+
return image_url
|
74 |
+
except Exception as e:
|
75 |
+
print(f"Error finding full-size image for {image_url}: {str(e)}")
|
76 |
+
return image_url
|
77 |
+
|
78 |
+
|
79 |
+
def download_image(session, image_url, save_dir, base_url):
|
80 |
+
"""
|
81 |
+
Download an image from a given URL and save it to the specified directory.
|
82 |
+
Attempt to get the full-size image if the URL is a thumbnail.
|
83 |
+
|
84 |
+
Args:
|
85 |
+
session (requests.Session): The requests session to use for downloading.
|
86 |
+
image_url (str): The URL of the image to download.
|
87 |
+
save_dir (str): The directory to save the downloaded image.
|
88 |
+
base_url (str): The base URL of the page being crawled.
|
89 |
+
"""
|
90 |
+
try:
|
91 |
+
full_size_url = get_full_size_image_url(session, image_url, base_url)
|
92 |
+
image_filename = os.path.basename(full_size_url).split('?')[0]
|
93 |
+
sanitized_image_filename = sanitize_filename(image_filename)
|
94 |
+
image_path = os.path.join(save_dir, sanitized_image_filename)
|
95 |
+
|
96 |
+
if os.path.exists(image_path):
|
97 |
+
print(f"Image already exists: {image_path}")
|
98 |
+
return
|
99 |
+
|
100 |
+
response = session.get(full_size_url, stream=True)
|
101 |
+
response.raise_for_status()
|
102 |
+
with open(image_path, 'wb') as image_file:
|
103 |
+
for chunk in response.iter_content(chunk_size=8192):
|
104 |
+
image_file.write(chunk)
|
105 |
+
print(f"Saved full-size image: {image_path}")
|
106 |
+
except requests.RequestException as e:
|
107 |
+
print(f"Error downloading image {full_size_url}: {str(e)}")
|
108 |
+
except IOError as e:
|
109 |
+
print(f"Error saving image {full_size_url}: {str(e)}")
|
110 |
+
|
111 |
+
|
112 |
+
def save_result(target_url):
|
113 |
+
"""
|
114 |
+
Crawl a given URL, extract content, and save the results.
|
115 |
+
|
116 |
+
This function crawls the specified URL, saves the markdown content,
|
117 |
+
and downloads all associated images in parallel.
|
118 |
+
|
119 |
+
Args:
|
120 |
+
target_url (str): The URL to crawl and save content from.
|
121 |
+
"""
|
122 |
+
crawler = create_crawler()
|
123 |
+
result = crawler.run(url=target_url)
|
124 |
+
title = result.metadata.get('title', 'untitled')
|
125 |
+
sanitized_title = sanitize_filename(title).replace(" ", "_")
|
126 |
+
|
127 |
+
# Choose the appropriate base path based on the operating system
|
128 |
+
if platform.system() == "Windows":
|
129 |
+
base_path = "E:\\knowledgebase\\Saved Websites\\"
|
130 |
+
else:
|
131 |
+
base_path = "/home/kade/saved_websites/"
|
132 |
+
|
133 |
+
save_dir = os.path.join(base_path, sanitized_title)
|
134 |
+
os.makedirs(save_dir, exist_ok=True)
|
135 |
+
|
136 |
+
# Save markdown
|
137 |
+
save_path = os.path.join(save_dir, f"{sanitized_title}.md")
|
138 |
+
#sanitized_markdown = sanitize_citations(result.markdown)
|
139 |
+
with open(save_path, "w", encoding="utf-8") as file:
|
140 |
+
file.write(result.markdown)
|
141 |
+
#file.write(sanitized_markdown)
|
142 |
+
print(f"Saved markdown to {save_path}")
|
143 |
+
|
144 |
+
# Save images in parallel
|
145 |
+
if 'images' in result.media and isinstance(result.media['images'], list):
|
146 |
+
session = requests.Session()
|
147 |
+
headers = {
|
148 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
|
149 |
+
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
150 |
+
'Chrome/91.0.4472.124 Safari/537.36',
|
151 |
+
'Referer': target_url,
|
152 |
+
'Accept': ('image/avif,image/webp,image/apng,image/svg+xml,'
|
153 |
+
'image/*,*/*;q=0.8'),
|
154 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
155 |
+
'Sec-Fetch-Dest': 'image',
|
156 |
+
'Sec-Fetch-Mode': 'no-cors',
|
157 |
+
'Sec-Fetch-Site': 'cross-site',
|
158 |
+
}
|
159 |
+
session.headers.update(headers)
|
160 |
+
|
161 |
+
with ThreadPoolExecutor(max_workers=5) as executor:
|
162 |
+
futures = []
|
163 |
+
for image_data in result.media['images']:
|
164 |
+
if 'src' in image_data:
|
165 |
+
# Use urljoin to create absolute URLs for image sources
|
166 |
+
absolute_image_url = urljoin(target_url, image_data['src'])
|
167 |
+
futures.append(executor.submit(download_image,
|
168 |
+
session,
|
169 |
+
absolute_image_url,
|
170 |
+
save_dir,
|
171 |
+
target_url)) # Pass target_url as base_url
|
172 |
+
|
173 |
+
for future in as_completed(futures):
|
174 |
+
future.result()
|
175 |
+
|
176 |
+
|
177 |
+
if __name__ == "__main__":
|
178 |
+
if len(sys.argv) != 2:
|
179 |
+
print("Usage: python crawl.py <URL>")
|
180 |
+
else:
|
181 |
+
url = sys.argv[1]
|
182 |
+
save_result(url)
|
joy
ADDED
@@ -0,0 +1,555 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
JoyCaption Alpha One
|
6 |
+
|
7 |
+
This module provides functionality for generating captions for images using a
|
8 |
+
combination of CLIP, LLM, and custom image adapters. It supports various
|
9 |
+
caption types, tones, and lengths.
|
10 |
+
|
11 |
+
The main components include:
|
12 |
+
- Loading and initializing models (CLIP, LLM, image adapter)
|
13 |
+
- Processing images and generating captions
|
14 |
+
- Command-line interface for batch processing images in a directory
|
15 |
+
"""
|
16 |
+
|
17 |
+
import os
|
18 |
+
import argparse
|
19 |
+
import re
|
20 |
+
from pathlib import Path
|
21 |
+
from PIL import Image
|
22 |
+
import pillow_jxl
|
23 |
+
import torch
|
24 |
+
import torchvision.transforms.functional as TVF
|
25 |
+
from transformers import (
|
26 |
+
AutoModel,
|
27 |
+
AutoProcessor,
|
28 |
+
AutoTokenizer,
|
29 |
+
AutoModelForCausalLM,
|
30 |
+
PreTrainedTokenizer,
|
31 |
+
PreTrainedTokenizerFast,
|
32 |
+
)
|
33 |
+
from torch import nn
|
34 |
+
|
35 |
+
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
36 |
+
MODEL_PATH = "meta-llama/Meta-Llama-3.1-8B"
|
37 |
+
CHECKPOINT_PATH = Path(__file__).resolve().parent / "9em124t2-499968"
|
38 |
+
CAPTION_TYPE_MAP = {
|
39 |
+
("descriptive", "formal", False, False): [
|
40 |
+
"Write a descriptive caption for this image in a formal tone."
|
41 |
+
],
|
42 |
+
("descriptive", "formal", False, True): [
|
43 |
+
"Write a descriptive caption for this image in a formal tone within "
|
44 |
+
"{word_count} words."
|
45 |
+
],
|
46 |
+
("descriptive", "formal", True, False): [
|
47 |
+
"Write a {length} descriptive caption for this image in a formal tone."
|
48 |
+
],
|
49 |
+
("descriptive", "informal", False, False): [
|
50 |
+
"Write a descriptive caption for this image in a casual tone."
|
51 |
+
],
|
52 |
+
("descriptive", "informal", False, True): [
|
53 |
+
"Write a descriptive caption for this image in a casual tone within "
|
54 |
+
"{word_count} words."
|
55 |
+
],
|
56 |
+
("descriptive", "informal", True, False): [
|
57 |
+
"Write a {length} descriptive caption for this image in a casual tone."
|
58 |
+
],
|
59 |
+
("training_prompt", "formal", False, False): [
|
60 |
+
"Write a stable diffusion prompt for this image."
|
61 |
+
],
|
62 |
+
("training_prompt", "formal", False, True): [
|
63 |
+
"Write a stable diffusion prompt for this image within {word_count} "
|
64 |
+
"words."
|
65 |
+
],
|
66 |
+
("training_prompt", "formal", True, False): [
|
67 |
+
"Write a {length} stable diffusion prompt for this image."
|
68 |
+
],
|
69 |
+
("rng-tags", "formal", False, False): [
|
70 |
+
"Write a list of Booru tags for this image."
|
71 |
+
],
|
72 |
+
("rng-tags", "formal", False, True): [
|
73 |
+
"Write a list of Booru tags for this image within {word_count} words."
|
74 |
+
],
|
75 |
+
("rng-tags", "formal", True, False): [
|
76 |
+
"Write a {length} list of Booru tags for this image."
|
77 |
+
],
|
78 |
+
}
|
79 |
+
|
80 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
81 |
+
|
82 |
+
class ImageAdapter(nn.Module):
|
83 |
+
"""
|
84 |
+
Custom image adapter module for processing CLIP vision outputs.
|
85 |
+
|
86 |
+
This module adapts the output of a CLIP vision model to be compatible with
|
87 |
+
a text model. It supports optional layer normalization, positional
|
88 |
+
embeddings, and deep feature extraction.
|
89 |
+
|
90 |
+
Args:
|
91 |
+
input_features (int): Number of input features from the vision model.
|
92 |
+
output_features (int): Number of output features to match the text model.
|
93 |
+
ln1 (bool): Whether to use layer normalization.
|
94 |
+
pos_emb (bool): Whether to use positional embeddings.
|
95 |
+
num_image_tokens (int): Number of image tokens.
|
96 |
+
deep_extract (bool): Whether to use deep feature extraction.
|
97 |
+
"""
|
98 |
+
|
99 |
+
def __init__(
|
100 |
+
self,
|
101 |
+
input_features: int,
|
102 |
+
output_features: int,
|
103 |
+
ln1: bool,
|
104 |
+
pos_emb: bool,
|
105 |
+
num_image_tokens: int,
|
106 |
+
deep_extract: bool,
|
107 |
+
):
|
108 |
+
super().__init__()
|
109 |
+
self.deep_extract = deep_extract
|
110 |
+
|
111 |
+
if self.deep_extract:
|
112 |
+
input_features = input_features * 5
|
113 |
+
|
114 |
+
self.linear1 = nn.Linear(input_features, output_features)
|
115 |
+
self.activation = nn.GELU()
|
116 |
+
self.linear2 = nn.Linear(output_features, output_features)
|
117 |
+
self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
|
118 |
+
self.pos_emb = None if not pos_emb else nn.Parameter(
|
119 |
+
torch.zeros(num_image_tokens, input_features)
|
120 |
+
)
|
121 |
+
|
122 |
+
self.other_tokens = nn.Embedding(3, output_features)
|
123 |
+
self.other_tokens.weight.data.normal_(mean=0.0, std=0.02)
|
124 |
+
|
125 |
+
def forward(self, vision_outputs: torch.Tensor):
|
126 |
+
"""
|
127 |
+
Forward pass of the image adapter.
|
128 |
+
|
129 |
+
Args:
|
130 |
+
vision_outputs (torch.Tensor): Output tensor from the CLIP vision model.
|
131 |
+
|
132 |
+
Returns:
|
133 |
+
torch.Tensor: Adapted image features.
|
134 |
+
"""
|
135 |
+
if self.deep_extract:
|
136 |
+
x = torch.concat((
|
137 |
+
vision_outputs[-2],
|
138 |
+
vision_outputs[3],
|
139 |
+
vision_outputs[7],
|
140 |
+
vision_outputs[13],
|
141 |
+
vision_outputs[20],
|
142 |
+
), dim=-1)
|
143 |
+
assert len(x.shape) == 3, f"Expected 3, got {len(x.shape)}"
|
144 |
+
assert x.shape[-1] == vision_outputs[-2].shape[-1] * 5, (
|
145 |
+
f"Expected {vision_outputs[-2].shape[-1] * 5}, got {x.shape[-1]}"
|
146 |
+
)
|
147 |
+
else:
|
148 |
+
x = vision_outputs[-2]
|
149 |
+
|
150 |
+
x = self.ln1(x)
|
151 |
+
|
152 |
+
if self.pos_emb is not None:
|
153 |
+
assert x.shape[-2:] == self.pos_emb.shape, (
|
154 |
+
f"Expected {self.pos_emb.shape}, got {x.shape[-2:]}"
|
155 |
+
)
|
156 |
+
x = x + self.pos_emb
|
157 |
+
|
158 |
+
x = self.linear1(x)
|
159 |
+
x = self.activation(x)
|
160 |
+
x = self.linear2(x)
|
161 |
+
|
162 |
+
other_tokens = self.other_tokens(
|
163 |
+
torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(
|
164 |
+
x.shape[0], -1
|
165 |
+
)
|
166 |
+
)
|
167 |
+
assert other_tokens.shape == (x.shape[0], 2, x.shape[2]), (
|
168 |
+
f"Expected {(x.shape[0], 2, x.shape[2])}, got {other_tokens.shape}"
|
169 |
+
)
|
170 |
+
x = torch.cat((other_tokens[:, 0:1], x, other_tokens[:, 1:2]), dim=1)
|
171 |
+
|
172 |
+
return x
|
173 |
+
|
174 |
+
def get_eot_embedding(self):
|
175 |
+
"""
|
176 |
+
Get the end-of-text embedding.
|
177 |
+
|
178 |
+
Returns:
|
179 |
+
torch.Tensor: The end-of-text embedding.
|
180 |
+
"""
|
181 |
+
return self.other_tokens(
|
182 |
+
torch.tensor([2], device=self.other_tokens.weight.device)
|
183 |
+
).squeeze(0)
|
184 |
+
|
185 |
+
class JoyCaptionModel:
|
186 |
+
"""
|
187 |
+
A class for generating captions for images using CLIP, LLM, and custom image adapters.
|
188 |
+
|
189 |
+
This class encapsulates the functionality to load and initialize various models
|
190 |
+
(CLIP, LLM, image adapter) and use them to process images and generate captions.
|
191 |
+
It supports different caption types, tones, and lengths.
|
192 |
+
|
193 |
+
Attributes:
|
194 |
+
clip_model: The CLIP vision model for processing images.
|
195 |
+
text_model: The language model for generating captions.
|
196 |
+
image_adapter: Custom adapter for processing CLIP vision outputs.
|
197 |
+
tokenizer: Tokenizer for the language model.
|
198 |
+
|
199 |
+
Methods:
|
200 |
+
load_models(): Load and initialize all required models.
|
201 |
+
process_image(input_image, caption_type, caption_tone, caption_length):
|
202 |
+
Process an input image and generate a caption based on specified parameters.
|
203 |
+
"""
|
204 |
+
|
205 |
+
def __init__(self):
|
206 |
+
self.clip_model = None
|
207 |
+
self.text_model = None
|
208 |
+
self.image_adapter = None
|
209 |
+
self.tokenizer = None
|
210 |
+
|
211 |
+
def load_models(self):
|
212 |
+
"""
|
213 |
+
Load and initialize all required models (CLIP, LLM, image adapter).
|
214 |
+
"""
|
215 |
+
print("Loading CLIP")
|
216 |
+
self.clip_model = AutoModel.from_pretrained(CLIP_PATH)
|
217 |
+
self.clip_model = self.clip_model.vision_model
|
218 |
+
|
219 |
+
if (CHECKPOINT_PATH / "clip_model.pt").exists():
|
220 |
+
print("Loading VLM's custom vision model")
|
221 |
+
checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu')
|
222 |
+
checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
|
223 |
+
self.clip_model.load_state_dict(checkpoint)
|
224 |
+
del checkpoint
|
225 |
+
|
226 |
+
self.clip_model.eval()
|
227 |
+
self.clip_model.requires_grad_(False)
|
228 |
+
self.clip_model.to("cuda")
|
229 |
+
|
230 |
+
print("Loading tokenizer")
|
231 |
+
self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
|
232 |
+
assert isinstance(self.tokenizer, PreTrainedTokenizer) or isinstance(
|
233 |
+
self.tokenizer, PreTrainedTokenizerFast
|
234 |
+
), f"Tokenizer is of type {type(self.tokenizer)}"
|
235 |
+
|
236 |
+
print("Loading LLM")
|
237 |
+
if (CHECKPOINT_PATH / "text_model").exists():
|
238 |
+
print("Loading VLM's custom text model")
|
239 |
+
self.text_model = AutoModelForCausalLM.from_pretrained(
|
240 |
+
CHECKPOINT_PATH / "text_model",
|
241 |
+
device_map=0,
|
242 |
+
torch_dtype=torch.bfloat16
|
243 |
+
)
|
244 |
+
else:
|
245 |
+
self.text_model = AutoModelForCausalLM.from_pretrained(
|
246 |
+
MODEL_PATH,
|
247 |
+
device_map="auto",
|
248 |
+
torch_dtype=torch.bfloat16
|
249 |
+
)
|
250 |
+
|
251 |
+
self.text_model.eval()
|
252 |
+
|
253 |
+
print("Loading image adapter")
|
254 |
+
self.image_adapter = ImageAdapter(
|
255 |
+
self.clip_model.config.hidden_size,
|
256 |
+
self.text_model.config.hidden_size,
|
257 |
+
False,
|
258 |
+
False,
|
259 |
+
38,
|
260 |
+
False
|
261 |
+
)
|
262 |
+
self.image_adapter.load_state_dict(
|
263 |
+
torch.load(CHECKPOINT_PATH / "image_adapter.pt", map_location="cpu")
|
264 |
+
)
|
265 |
+
self.image_adapter.eval()
|
266 |
+
self.image_adapter.to("cuda")
|
267 |
+
|
268 |
+
@torch.no_grad()
|
269 |
+
def process_image(self,
|
270 |
+
input_image: Image.Image,
|
271 |
+
caption_type: str,
|
272 |
+
caption_tone: str,
|
273 |
+
caption_length: str | int,
|
274 |
+
custom_prompt: str = None) -> str:
|
275 |
+
"""
|
276 |
+
Process an input image and generate a caption based on specified parameters.
|
277 |
+
"""
|
278 |
+
torch.cuda.empty_cache()
|
279 |
+
|
280 |
+
if caption_type == "custom" and custom_prompt:
|
281 |
+
prompt_str = custom_prompt
|
282 |
+
else:
|
283 |
+
prompt_str = self._get_prompt_string(caption_type, caption_tone, caption_length)
|
284 |
+
print(f"Prompt: {prompt_str}")
|
285 |
+
|
286 |
+
pixel_values = self._preprocess_image(input_image)
|
287 |
+
prompt = self._tokenize_prompt(prompt_str)
|
288 |
+
|
289 |
+
embedded_images = self._embed_image(pixel_values)
|
290 |
+
inputs_embeds, input_ids, attention_mask = self._construct_inputs(embedded_images, prompt)
|
291 |
+
|
292 |
+
generate_ids = self._generate_caption(inputs_embeds, input_ids, attention_mask)
|
293 |
+
caption = self._decode_caption(generate_ids, input_ids)
|
294 |
+
|
295 |
+
return caption.strip()
|
296 |
+
|
297 |
+
def _get_prompt_string(self, caption_type, caption_tone, caption_length):
|
298 |
+
length = None if caption_length == "any" else caption_length
|
299 |
+
|
300 |
+
if isinstance(length, str):
|
301 |
+
try:
|
302 |
+
length = int(length)
|
303 |
+
except ValueError:
|
304 |
+
pass
|
305 |
+
|
306 |
+
if caption_type in {"rng-tags", "training_prompt"}:
|
307 |
+
caption_tone = "formal"
|
308 |
+
|
309 |
+
prompt_key = (
|
310 |
+
caption_type,
|
311 |
+
caption_tone,
|
312 |
+
isinstance(length, str),
|
313 |
+
isinstance(length, int)
|
314 |
+
)
|
315 |
+
if prompt_key not in CAPTION_TYPE_MAP:
|
316 |
+
raise ValueError(f"Invalid caption type: {prompt_key}")
|
317 |
+
|
318 |
+
prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(
|
319 |
+
length=length, word_count=length
|
320 |
+
)
|
321 |
+
return prompt_str
|
322 |
+
|
323 |
+
def _preprocess_image(self, input_image):
|
324 |
+
image = input_image.resize((384, 384), Image.LANCZOS)
|
325 |
+
pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
|
326 |
+
pixel_values = TVF.normalize(pixel_values, [0.5], [0.5])
|
327 |
+
pixel_values = pixel_values.to('cuda')
|
328 |
+
return pixel_values
|
329 |
+
|
330 |
+
def _tokenize_prompt(self, prompt_str):
|
331 |
+
prompt = self.tokenizer.encode(
|
332 |
+
prompt_str,
|
333 |
+
return_tensors='pt',
|
334 |
+
padding=False,
|
335 |
+
truncation=False,
|
336 |
+
add_special_tokens=False
|
337 |
+
)
|
338 |
+
return prompt
|
339 |
+
|
340 |
+
def _embed_image(self, pixel_values):
|
341 |
+
with torch.amp.autocast_mode.autocast('cuda', enabled=True):
|
342 |
+
vision_outputs = self.clip_model(pixel_values=pixel_values, output_hidden_states=True)
|
343 |
+
image_features = vision_outputs.hidden_states
|
344 |
+
embedded_images = self.image_adapter(image_features)
|
345 |
+
embedded_images = embedded_images.to('cuda')
|
346 |
+
return embedded_images
|
347 |
+
|
348 |
+
def _construct_inputs(self, embedded_images, prompt):
|
349 |
+
prompt_embeds = self.text_model.model.embed_tokens(prompt.to('cuda'))
|
350 |
+
assert prompt_embeds.shape == (1, prompt.shape[1], self.text_model.config.hidden_size), (
|
351 |
+
f"Prompt shape is {prompt_embeds.shape}, expected "
|
352 |
+
f"{(1, prompt.shape[1], self.text_model.config.hidden_size)}"
|
353 |
+
)
|
354 |
+
|
355 |
+
embedded_bos = self.text_model.model.embed_tokens(
|
356 |
+
torch.tensor([[self.tokenizer.bos_token_id]],
|
357 |
+
device=self.text_model.device,
|
358 |
+
dtype=torch.int64)
|
359 |
+
)
|
360 |
+
|
361 |
+
eot_embed = self.image_adapter.get_eot_embedding().unsqueeze(0).to(
|
362 |
+
dtype=self.text_model.dtype
|
363 |
+
)
|
364 |
+
|
365 |
+
inputs_embeds = torch.cat([
|
366 |
+
embedded_bos.expand(embedded_images.shape[0], -1, -1),
|
367 |
+
embedded_images.to(dtype=embedded_bos.dtype),
|
368 |
+
prompt_embeds.expand(embedded_images.shape[0], -1, -1),
|
369 |
+
eot_embed.expand(embedded_images.shape[0], -1, -1),
|
370 |
+
], dim=1)
|
371 |
+
|
372 |
+
input_ids = torch.cat([
|
373 |
+
torch.tensor([[self.tokenizer.bos_token_id]], dtype=torch.long),
|
374 |
+
torch.zeros((1, embedded_images.shape[1]), dtype=torch.long),
|
375 |
+
prompt,
|
376 |
+
torch.tensor([[self.tokenizer.eos_token_id]], dtype=torch.long),
|
377 |
+
], dim=1).to('cuda')
|
378 |
+
attention_mask = torch.ones_like(input_ids)
|
379 |
+
|
380 |
+
return inputs_embeds, input_ids, attention_mask
|
381 |
+
|
382 |
+
def _generate_caption(self, inputs_embeds, input_ids, attention_mask):
|
383 |
+
generate_ids = self.text_model.generate(
|
384 |
+
input_ids,
|
385 |
+
inputs_embeds=inputs_embeds,
|
386 |
+
attention_mask=attention_mask,
|
387 |
+
max_new_tokens=300,
|
388 |
+
do_sample=True,
|
389 |
+
suppress_tokens=None
|
390 |
+
)
|
391 |
+
return generate_ids
|
392 |
+
|
393 |
+
def _decode_caption(self, generate_ids, input_ids):
|
394 |
+
generate_ids = generate_ids[:, input_ids.shape[1]:]
|
395 |
+
|
396 |
+
if (generate_ids[0][-1] == self.tokenizer.eos_token_id or
|
397 |
+
generate_ids[0][-1] == self.tokenizer.convert_tokens_to_ids("<|eot_id|>")):
|
398 |
+
generate_ids = generate_ids[:, :-1]
|
399 |
+
|
400 |
+
caption = self.tokenizer.batch_decode(
|
401 |
+
generate_ids,
|
402 |
+
skip_special_tokens=False,
|
403 |
+
clean_up_tokenization_spaces=False
|
404 |
+
)[0]
|
405 |
+
return caption
|
406 |
+
|
407 |
+
|
408 |
+
def main():
|
409 |
+
"""Generate captions for images in a directory and save them as .caption files."""
|
410 |
+
parser = argparse.ArgumentParser(
|
411 |
+
description="Generate captions for images in a directory and save them as .caption files."
|
412 |
+
)
|
413 |
+
parser.add_argument("directory", type=str, help="Target directory containing images.")
|
414 |
+
parser.add_argument(
|
415 |
+
"--caption_type",
|
416 |
+
type=str,
|
417 |
+
default="descriptive",
|
418 |
+
choices=["descriptive", "training_prompt", "rng-tags", "custom"],
|
419 |
+
help="Type of caption to generate."
|
420 |
+
)
|
421 |
+
parser.add_argument(
|
422 |
+
"--caption_tone",
|
423 |
+
type=str,
|
424 |
+
default="formal",
|
425 |
+
choices=["formal", "informal"],
|
426 |
+
help="Tone of the caption."
|
427 |
+
)
|
428 |
+
parser.add_argument(
|
429 |
+
"--caption_length",
|
430 |
+
type=str,
|
431 |
+
default="any",
|
432 |
+
help="Length of the caption."
|
433 |
+
)
|
434 |
+
parser.add_argument(
|
435 |
+
"--dont-strip-commas",
|
436 |
+
action="store_true",
|
437 |
+
help="If set, commas will not be stripped from the generated captions."
|
438 |
+
)
|
439 |
+
parser.add_argument(
|
440 |
+
"--custom_prompt",
|
441 |
+
type=str,
|
442 |
+
help="Custom prompt for the captioner. Use with --caption_type custom."
|
443 |
+
)
|
444 |
+
parser.add_argument(
|
445 |
+
'--add-commas-to-sentence-ends',
|
446 |
+
action='store_true',
|
447 |
+
help='Add commas after periods in sentences'
|
448 |
+
)
|
449 |
+
parser.add_argument(
|
450 |
+
'--feed-from-tags',
|
451 |
+
action='store_true',
|
452 |
+
help='Use .txt files with the same base filename as the images as input to the captioner'
|
453 |
+
)
|
454 |
+
|
455 |
+
args = parser.parse_args()
|
456 |
+
|
457 |
+
# Initialize and load models
|
458 |
+
joy_caption_model = JoyCaptionModel()
|
459 |
+
joy_caption_model.load_models()
|
460 |
+
|
461 |
+
# Validate custom prompt usage
|
462 |
+
if args.caption_type == "custom" and not args.custom_prompt:
|
463 |
+
parser.error("--custom_prompt is required when using --caption_type custom")
|
464 |
+
elif args.caption_type != "custom" and args.custom_prompt:
|
465 |
+
parser.error("--custom_prompt can only be used with --caption_type custom")
|
466 |
+
|
467 |
+
image_extensions = {".webp", ".png", ".jpeg", ".jpg", ".jxl"}
|
468 |
+
for image_path in Path(args.directory).rglob("*"):
|
469 |
+
if image_path.suffix.lower() in image_extensions:
|
470 |
+
caption_file = image_path.with_suffix('.caption')
|
471 |
+
|
472 |
+
# Skip if the caption file already exists
|
473 |
+
if caption_file.exists():
|
474 |
+
print(f"Skipping {image_path}: Caption file already exists.")
|
475 |
+
continue
|
476 |
+
|
477 |
+
input_image = Image.open(image_path).convert("RGB")
|
478 |
+
|
479 |
+
# Use custom prompt if specified
|
480 |
+
if args.caption_type == "custom":
|
481 |
+
caption = joy_caption_model.process_image(
|
482 |
+
input_image,
|
483 |
+
"custom",
|
484 |
+
args.caption_tone,
|
485 |
+
args.caption_length,
|
486 |
+
custom_prompt=args.custom_prompt
|
487 |
+
)
|
488 |
+
else:
|
489 |
+
# Check for --feed-from-tags
|
490 |
+
if args.feed_from_tags:
|
491 |
+
tag_file = find_tag_file(image_path)
|
492 |
+
if tag_file:
|
493 |
+
with open(tag_file, 'r', encoding='utf-8') as f:
|
494 |
+
custom_prompt = f.read().strip()
|
495 |
+
caption = joy_caption_model.process_image(
|
496 |
+
input_image,
|
497 |
+
"custom",
|
498 |
+
args.caption_tone,
|
499 |
+
args.caption_length,
|
500 |
+
custom_prompt=custom_prompt
|
501 |
+
)
|
502 |
+
else:
|
503 |
+
caption = joy_caption_model.process_image(
|
504 |
+
input_image,
|
505 |
+
args.caption_type,
|
506 |
+
args.caption_tone,
|
507 |
+
args.caption_length
|
508 |
+
)
|
509 |
+
else:
|
510 |
+
caption = joy_caption_model.process_image(
|
511 |
+
input_image,
|
512 |
+
args.caption_type,
|
513 |
+
args.caption_tone,
|
514 |
+
args.caption_length
|
515 |
+
)
|
516 |
+
|
517 |
+
# Strip commas if the --dont-strip-commas flag is not set
|
518 |
+
if not args.dont_strip_commas:
|
519 |
+
# Existing comma stripping logic
|
520 |
+
caption = re.sub(r',\s*([^\d])', r' \1', caption)
|
521 |
+
|
522 |
+
# New feature: Add commas after periods if specified
|
523 |
+
if args.add_commas_to_sentence_ends:
|
524 |
+
caption = re.sub(r'(\.)(\s+)([A-Z])', r'\1,\2\3', caption)
|
525 |
+
|
526 |
+
print(f"Caption for {image_path}:\n{caption}\n")
|
527 |
+
|
528 |
+
# Save the caption to a .caption file
|
529 |
+
with open(caption_file, 'w', encoding='utf-8') as f:
|
530 |
+
f.write(caption)
|
531 |
+
print(f"Caption saved to {caption_file}")
|
532 |
+
|
533 |
+
def find_tag_file(image_path):
|
534 |
+
"""
|
535 |
+
Find the corresponding .txt file for the given image path.
|
536 |
+
Handles cases where the image has a -(number) suffix.
|
537 |
+
"""
|
538 |
+
base_name = image_path.stem
|
539 |
+
tag_file = image_path.with_suffix('.txt')
|
540 |
+
|
541 |
+
if tag_file.exists():
|
542 |
+
return tag_file
|
543 |
+
|
544 |
+
# Handle -(number) suffix
|
545 |
+
match = re.match(r'(.+)-\d+$', base_name)
|
546 |
+
if match:
|
547 |
+
base_name = match.group(1)
|
548 |
+
tag_file = image_path.with_name(base_name).with_suffix('.txt')
|
549 |
+
if tag_file.exists():
|
550 |
+
return tag_file
|
551 |
+
|
552 |
+
return None
|
553 |
+
|
554 |
+
if __name__ == "__main__":
|
555 |
+
main()
|
jtp2
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
JTP2 (Joint Tagger Project 2) Image Classification Script
|
6 |
+
This script implements a multi-label classifier for furry images using the
|
7 |
+
PILOT2 model. It processes images, generates tags, and saves the results. The
|
8 |
+
model is based on a Vision Transformer architecture and uses a custom GatedHead
|
9 |
+
for classification.
|
10 |
+
Key features:
|
11 |
+
- Image preprocessing and transformation
|
12 |
+
- Model inference using PILOT2
|
13 |
+
- Tag generation with customizable threshold
|
14 |
+
- Batch processing of image directories
|
15 |
+
- Saving results as text files alongside images
|
16 |
+
Usage:
|
17 |
+
python jtp2.py <directory> [--threshold <float>]
|
18 |
+
"""
|
19 |
+
import os
|
20 |
+
import json
|
21 |
+
import argparse
|
22 |
+
from PIL import Image
|
23 |
+
import safetensors.torch
|
24 |
+
import timm
|
25 |
+
from timm.models import VisionTransformer
|
26 |
+
import torch
|
27 |
+
from torchvision.transforms import transforms
|
28 |
+
from torchvision.transforms import InterpolationMode
|
29 |
+
import torchvision.transforms.functional as TF
|
30 |
+
import pillow_jxl
|
31 |
+
|
32 |
+
torch.set_grad_enabled(False)
|
33 |
+
|
34 |
+
|
35 |
+
class Fit(torch.nn.Module):
|
36 |
+
"""
|
37 |
+
A custom transform module for resizing and padding images.
|
38 |
+
Args:
|
39 |
+
bounds (tuple[int, int] | int): The target dimensions for the image.
|
40 |
+
interpolation (InterpolationMode): The interpolation method for resizing.
|
41 |
+
grow (bool): Whether to allow upscaling of images.
|
42 |
+
pad (float | None): The padding value to use if padding is applied.
|
43 |
+
"""
|
44 |
+
def __init__(
|
45 |
+
self,
|
46 |
+
bounds: tuple[int, int] | int,
|
47 |
+
interpolation=InterpolationMode.LANCZOS,
|
48 |
+
grow: bool = True,
|
49 |
+
pad: float | None = None
|
50 |
+
):
|
51 |
+
super().__init__()
|
52 |
+
self.bounds = (bounds, bounds) if isinstance(bounds, int) else bounds
|
53 |
+
self.interpolation = interpolation
|
54 |
+
self.grow = grow
|
55 |
+
self.pad = pad
|
56 |
+
|
57 |
+
def forward(self, img: Image) -> Image:
|
58 |
+
"""
|
59 |
+
Applies the Fit transform to the input image.
|
60 |
+
Args:
|
61 |
+
img (Image): The input PIL Image.
|
62 |
+
Returns:
|
63 |
+
Image: The transformed PIL Image.
|
64 |
+
"""
|
65 |
+
wimg, himg = img.size
|
66 |
+
hbound, wbound = self.bounds
|
67 |
+
hscale = hbound / himg
|
68 |
+
wscale = wbound / wimg
|
69 |
+
if not self.grow:
|
70 |
+
hscale = min(hscale, 1.0)
|
71 |
+
wscale = min(wscale, 1.0)
|
72 |
+
scale = min(hscale, wscale)
|
73 |
+
if scale == 1.0:
|
74 |
+
return img
|
75 |
+
hnew = min(round(himg * scale), hbound)
|
76 |
+
wnew = min(round(wimg * scale), wbound)
|
77 |
+
img = TF.resize(img, (hnew, wnew), self.interpolation)
|
78 |
+
if self.pad is None:
|
79 |
+
return img
|
80 |
+
hpad = hbound - hnew
|
81 |
+
wpad = wbound - wnew
|
82 |
+
tpad = hpad
|
83 |
+
bpad = hpad - tpad
|
84 |
+
lpad = wpad
|
85 |
+
rpad = wpad - lpad
|
86 |
+
return TF.pad(img, (lpad, tpad, rpad, bpad), self.pad)
|
87 |
+
def __repr__(self) -> str:
|
88 |
+
"""
|
89 |
+
Returns a string representation of the Fit module.
|
90 |
+
Returns:
|
91 |
+
str: A string describing the module's parameters.
|
92 |
+
"""
|
93 |
+
return (
|
94 |
+
f"{self.__class__.__name__}(bounds={self.bounds}, "
|
95 |
+
f"interpolation={self.interpolation.value}, grow={self.grow}, "
|
96 |
+
f"pad={self.pad})"
|
97 |
+
)
|
98 |
+
|
99 |
+
|
100 |
+
class CompositeAlpha(torch.nn.Module):
|
101 |
+
"""
|
102 |
+
A module for compositing images with alpha channels over a background color.
|
103 |
+
Args:
|
104 |
+
background (tuple[float, float, float] | float): The background color to
|
105 |
+
use for compositing.
|
106 |
+
"""
|
107 |
+
def __init__(self, background: tuple[float, float, float] | float):
|
108 |
+
super().__init__()
|
109 |
+
self.background = (
|
110 |
+
(background, background, background)
|
111 |
+
if isinstance(background, float)
|
112 |
+
else background
|
113 |
+
)
|
114 |
+
self.background = torch.tensor(self.background).unsqueeze(1).unsqueeze(2)
|
115 |
+
|
116 |
+
def forward(self, img: torch.Tensor) -> torch.Tensor:
|
117 |
+
"""
|
118 |
+
Applies alpha compositing to the input image tensor.
|
119 |
+
Args:
|
120 |
+
img (torch.Tensor): The input image tensor.
|
121 |
+
Returns:
|
122 |
+
torch.Tensor: The composited image tensor.
|
123 |
+
"""
|
124 |
+
if img.shape[-3] == 3:
|
125 |
+
return img
|
126 |
+
alpha = img[..., 3, None, :, :]
|
127 |
+
img[..., :3, :, :] *= alpha
|
128 |
+
background = self.background.expand(-1, img.shape[-2], img.shape[-1])
|
129 |
+
if background.ndim == 1:
|
130 |
+
background = background[:, None, None]
|
131 |
+
elif background.ndim == 2:
|
132 |
+
background = background[None, :, :]
|
133 |
+
img[..., :3, :, :] += (1.0 - alpha) * background
|
134 |
+
return img[..., :3, :, :]
|
135 |
+
|
136 |
+
def __repr__(self) -> str:
|
137 |
+
"""
|
138 |
+
Returns a string representation of the CompositeAlpha module.
|
139 |
+
Returns:
|
140 |
+
str: A string describing the module's parameters.
|
141 |
+
"""
|
142 |
+
return f"{self.__class__.__name__}(background={self.background})"
|
143 |
+
|
144 |
+
|
145 |
+
transform = transforms.Compose([
|
146 |
+
Fit((384, 384)),
|
147 |
+
transforms.ToTensor(),
|
148 |
+
CompositeAlpha(0.5),
|
149 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
|
150 |
+
transforms.CenterCrop((384, 384)),
|
151 |
+
])
|
152 |
+
model = timm.create_model(
|
153 |
+
"vit_so400m_patch14_siglip_384.webli",
|
154 |
+
pretrained=False,
|
155 |
+
num_classes=9083
|
156 |
+
) # type: VisionTransformer
|
157 |
+
|
158 |
+
|
159 |
+
class GatedHead(torch.nn.Module):
|
160 |
+
"""
|
161 |
+
A custom head module with gating mechanism for the classifier.
|
162 |
+
Args:
|
163 |
+
num_features (int): The number of input features.
|
164 |
+
num_classes (int): The number of output classes.
|
165 |
+
"""
|
166 |
+
def __init__(self, num_features: int, num_classes: int):
|
167 |
+
super().__init__()
|
168 |
+
self.num_classes = num_classes
|
169 |
+
self.linear = torch.nn.Linear(num_features, num_classes * 2)
|
170 |
+
self.act = torch.nn.Sigmoid()
|
171 |
+
self.gate = torch.nn.Sigmoid()
|
172 |
+
|
173 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
174 |
+
"""
|
175 |
+
Applies the gated head to the input tensor.
|
176 |
+
Args:
|
177 |
+
x (torch.Tensor): The input tensor.
|
178 |
+
Returns:
|
179 |
+
torch.Tensor: The output tensor after applying the gated head.
|
180 |
+
"""
|
181 |
+
x = self.linear(x)
|
182 |
+
x = self.act(x[:, :self.num_classes]) * self.gate(x[:, self.num_classes:])
|
183 |
+
return x
|
184 |
+
|
185 |
+
|
186 |
+
model.head = GatedHead(min(model.head.weight.shape), 9083)
|
187 |
+
safetensors.torch.load_model(
|
188 |
+
model, "/home/kade/source/repos/JTP2/JTP_PILOT2-e3-vit_so400m_patch14_siglip_384.safetensors"
|
189 |
+
)
|
190 |
+
if torch.cuda.is_available():
|
191 |
+
model.cuda()
|
192 |
+
if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
|
193 |
+
model.to(dtype=torch.float16, memory_format=torch.channels_last)
|
194 |
+
model.eval()
|
195 |
+
with open("/home/kade/source/repos/JTP2/tags.json", "r", encoding="utf-8") as file:
|
196 |
+
tags = json.load(file) # type: dict
|
197 |
+
allowed_tags = list(tags.keys())
|
198 |
+
for idx, tag in enumerate(allowed_tags):
|
199 |
+
allowed_tags[idx] = tag.replace("_", " ")
|
200 |
+
sorted_tag_score = {}
|
201 |
+
|
202 |
+
|
203 |
+
def run_classifier(image, threshold):
|
204 |
+
"""
|
205 |
+
Runs the classifier on a single image and returns tags based on the threshold.
|
206 |
+
Args:
|
207 |
+
image (PIL.Image): The input image.
|
208 |
+
threshold (float): The probability threshold for including tags.
|
209 |
+
Returns:
|
210 |
+
tuple: A tuple containing the comma-separated tags and a dictionary of
|
211 |
+
tag probabilities.
|
212 |
+
"""
|
213 |
+
global sorted_tag_score
|
214 |
+
img = image.convert('RGBA')
|
215 |
+
tensor = transform(img).unsqueeze(0)
|
216 |
+
if torch.cuda.is_available():
|
217 |
+
tensor = tensor.cuda()
|
218 |
+
if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
|
219 |
+
tensor = tensor.to(dtype=torch.float16, memory_format=torch.channels_last)
|
220 |
+
with torch.no_grad():
|
221 |
+
probits = model(tensor)[0].cpu()
|
222 |
+
values, indices = probits.topk(250)
|
223 |
+
tag_score = dict()
|
224 |
+
for i in range(indices.size(0)):
|
225 |
+
tag_score[allowed_tags[indices[i]]] = values[i].item()
|
226 |
+
sorted_tag_score = dict(
|
227 |
+
sorted(tag_score.items(), key=lambda item: item[1], reverse=True)
|
228 |
+
)
|
229 |
+
return create_tags(threshold)
|
230 |
+
|
231 |
+
def create_tags(threshold):
|
232 |
+
"""
|
233 |
+
Creates a list of tags based on the current sorted_tag_score and the given
|
234 |
+
threshold.
|
235 |
+
Args:
|
236 |
+
threshold (float): The probability threshold for including tags.
|
237 |
+
Returns:
|
238 |
+
tuple: A tuple containing the comma-separated tags and a dictionary of
|
239 |
+
filtered tag probabilities.
|
240 |
+
"""
|
241 |
+
global sorted_tag_score
|
242 |
+
filtered_tag_score = {
|
243 |
+
key: value for key, value in sorted_tag_score.items() if value > threshold
|
244 |
+
}
|
245 |
+
text_no_impl = ", ".join(filtered_tag_score.keys())
|
246 |
+
return text_no_impl, filtered_tag_score
|
247 |
+
|
248 |
+
def process_directory(directory, threshold):
|
249 |
+
"""
|
250 |
+
Processes all images in a directory and its subdirectories, generating tags
|
251 |
+
for each image.
|
252 |
+
Args:
|
253 |
+
directory (str): The path to the directory containing images.
|
254 |
+
threshold (float): The probability threshold for including tags.
|
255 |
+
Returns:
|
256 |
+
dict: A dictionary mapping image paths to their generated tags.
|
257 |
+
"""
|
258 |
+
results = {}
|
259 |
+
for root, _, files in os.walk(directory):
|
260 |
+
for file in files:
|
261 |
+
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.jxl')):
|
262 |
+
image_path = os.path.join(root, file)
|
263 |
+
text_file_path = os.path.splitext(image_path)[0] + ".txt"
|
264 |
+
|
265 |
+
# Skip if a corresponding .txt file already exists
|
266 |
+
if os.path.exists(text_file_path):
|
267 |
+
continue
|
268 |
+
|
269 |
+
image = Image.open(image_path)
|
270 |
+
tags, _ = run_classifier(image, threshold)
|
271 |
+
results[image_path] = tags
|
272 |
+
|
273 |
+
# Save tags to a text file with the same name as the image, using UTF-8 encoding
|
274 |
+
with open(text_file_path, "w", encoding="utf-8") as text_file:
|
275 |
+
text_file.write(tags)
|
276 |
+
return results
|
277 |
+
|
278 |
+
|
279 |
+
if __name__ == "__main__":
|
280 |
+
parser = argparse.ArgumentParser(
|
281 |
+
description="Run inference on a directory of images."
|
282 |
+
)
|
283 |
+
parser.add_argument("directory", type=str, help="Target directory containing images.")
|
284 |
+
parser.add_argument(
|
285 |
+
"--threshold", type=float, default=0.2, help="Threshold for tag filtering."
|
286 |
+
)
|
287 |
+
args = parser.parse_args()
|
288 |
+
results = process_directory(args.directory, args.threshold)
|
289 |
+
for image_path, tags in results.items():
|
290 |
+
print(f"{image_path}: {tags}")
|
jtp2_overwrite
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
"""
|
5 |
+
JTP2 (Joint Tagger Project 2) Image Classification Script
|
6 |
+
|
7 |
+
This script implements a multi-label classifier for furry images using the
|
8 |
+
PILOT2 model. It processes images, generates tags, and saves the results. The
|
9 |
+
model is based on a Vision Transformer architecture and uses a custom GatedHead
|
10 |
+
for classification.
|
11 |
+
|
12 |
+
Key features:
|
13 |
+
- Image preprocessing and transformation
|
14 |
+
- Model inference using PILOT2
|
15 |
+
- Tag generation with customizable threshold
|
16 |
+
- Batch processing of image directories
|
17 |
+
- Saving results as text files alongside images
|
18 |
+
|
19 |
+
Usage:
|
20 |
+
python jtp2.py <directory> [--threshold <float>]
|
21 |
+
"""
|
22 |
+
|
23 |
+
import os
|
24 |
+
import json
|
25 |
+
import argparse
|
26 |
+
from PIL import Image
|
27 |
+
import safetensors.torch
|
28 |
+
import timm
|
29 |
+
from timm.models import VisionTransformer
|
30 |
+
import torch
|
31 |
+
from torchvision.transforms import transforms
|
32 |
+
from torchvision.transforms import InterpolationMode
|
33 |
+
import torchvision.transforms.functional as TF
|
34 |
+
import pillow_jxl
|
35 |
+
|
36 |
+
|
37 |
+
class Fit(torch.nn.Module):
|
38 |
+
"""
|
39 |
+
A custom transform module for resizing and padding images.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
bounds (tuple[int, int] | int): The target dimensions for the image.
|
43 |
+
interpolation (InterpolationMode): The interpolation method for resizing.
|
44 |
+
grow (bool): Whether to allow upscaling of images.
|
45 |
+
pad (float | None): The padding value to use if padding is applied.
|
46 |
+
"""
|
47 |
+
|
48 |
+
def __init__(
|
49 |
+
self,
|
50 |
+
bounds: tuple[int, int] | int,
|
51 |
+
interpolation=InterpolationMode.LANCZOS,
|
52 |
+
grow: bool = True,
|
53 |
+
pad: float | None = None
|
54 |
+
):
|
55 |
+
super().__init__()
|
56 |
+
self.bounds = (bounds, bounds) if isinstance(bounds, int) else bounds
|
57 |
+
self.interpolation = interpolation
|
58 |
+
self.grow = grow
|
59 |
+
self.pad = pad
|
60 |
+
|
61 |
+
def forward(self, img: Image) -> Image:
|
62 |
+
"""
|
63 |
+
Applies the Fit transform to the input image.
|
64 |
+
|
65 |
+
Args:
|
66 |
+
img (Image): The input PIL Image.
|
67 |
+
|
68 |
+
Returns:
|
69 |
+
Image: The transformed PIL Image.
|
70 |
+
"""
|
71 |
+
wimg, himg = img.size
|
72 |
+
hbound, wbound = self.bounds
|
73 |
+
hscale = hbound / himg
|
74 |
+
wscale = wbound / wimg
|
75 |
+
if not self.grow:
|
76 |
+
hscale = min(hscale, 1.0)
|
77 |
+
wscale = min(wscale, 1.0)
|
78 |
+
scale = min(hscale, wscale)
|
79 |
+
if scale == 1.0:
|
80 |
+
return img
|
81 |
+
hnew = min(round(himg * scale), hbound)
|
82 |
+
wnew = min(round(wimg * scale), wbound)
|
83 |
+
img = TF.resize(img, (hnew, wnew), self.interpolation)
|
84 |
+
if self.pad is None:
|
85 |
+
return img
|
86 |
+
hpad = hbound - hnew
|
87 |
+
wpad = wbound - wnew
|
88 |
+
tpad = hpad // 2
|
89 |
+
bpad = hpad - tpad
|
90 |
+
lpad = wpad // 2
|
91 |
+
rpad = wpad - lpad
|
92 |
+
return TF.pad(img, (lpad, tpad, rpad, bpad), self.pad)
|
93 |
+
|
94 |
+
def __repr__(self) -> str:
|
95 |
+
"""
|
96 |
+
Returns a string representation of the Fit module.
|
97 |
+
|
98 |
+
Returns:
|
99 |
+
str: A string describing the module's parameters.
|
100 |
+
"""
|
101 |
+
return (
|
102 |
+
f"{self.__class__.__name__}(bounds={self.bounds}, "
|
103 |
+
f"interpolation={self.interpolation.value}, grow={self.grow}, "
|
104 |
+
f"pad={self.pad})"
|
105 |
+
)
|
106 |
+
|
107 |
+
|
108 |
+
class CompositeAlpha(torch.nn.Module):
|
109 |
+
"""
|
110 |
+
A module for compositing images with alpha channels over a background color.
|
111 |
+
|
112 |
+
Args:
|
113 |
+
background (tuple[float, float, float] | float): The background color to
|
114 |
+
use for compositing.
|
115 |
+
"""
|
116 |
+
|
117 |
+
def __init__(self, background: tuple[float, float, float] | float):
|
118 |
+
super().__init__()
|
119 |
+
self.background = (
|
120 |
+
(background, background, background)
|
121 |
+
if isinstance(background, float)
|
122 |
+
else background
|
123 |
+
)
|
124 |
+
self.background = torch.tensor(self.background).unsqueeze(1).unsqueeze(2)
|
125 |
+
|
126 |
+
def forward(self, img: torch.Tensor) -> torch.Tensor:
|
127 |
+
"""
|
128 |
+
Applies alpha compositing to the input image tensor.
|
129 |
+
|
130 |
+
Args:
|
131 |
+
img (torch.Tensor): The input image tensor.
|
132 |
+
|
133 |
+
Returns:
|
134 |
+
torch.Tensor: The composited image tensor.
|
135 |
+
"""
|
136 |
+
if img.shape[-3] == 3:
|
137 |
+
return img
|
138 |
+
alpha = img[..., 3, None, :, :]
|
139 |
+
img[..., :3, :, :] *= alpha
|
140 |
+
background = self.background.expand(-1, img.shape[-2], img.shape[-1])
|
141 |
+
if background.ndim == 1:
|
142 |
+
background = background[:, None, None]
|
143 |
+
elif background.ndim == 2:
|
144 |
+
background = background[None, :, :]
|
145 |
+
img[..., :3, :, :] += (1.0 - alpha) * background
|
146 |
+
return img[..., :3, :, :]
|
147 |
+
|
148 |
+
def __repr__(self) -> str:
|
149 |
+
"""
|
150 |
+
Returns a string representation of the CompositeAlpha module.
|
151 |
+
|
152 |
+
Returns:
|
153 |
+
str: A string describing the module's parameters.
|
154 |
+
"""
|
155 |
+
return f"{self.__class__.__name__}(background={self.background})"
|
156 |
+
|
157 |
+
|
158 |
+
transform = transforms.Compose([
|
159 |
+
Fit((384, 384)),
|
160 |
+
transforms.ToTensor(),
|
161 |
+
CompositeAlpha(0.5),
|
162 |
+
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
|
163 |
+
transforms.CenterCrop((384, 384)),
|
164 |
+
])
|
165 |
+
|
166 |
+
model = timm.create_model(
|
167 |
+
"vit_so400m_patch14_siglip_384.webli",
|
168 |
+
pretrained=False,
|
169 |
+
num_classes=9083
|
170 |
+
) # type: VisionTransformer
|
171 |
+
|
172 |
+
|
173 |
+
class GatedHead(torch.nn.Module):
|
174 |
+
"""
|
175 |
+
A custom head module with gating mechanism for the classifier.
|
176 |
+
|
177 |
+
Args:
|
178 |
+
num_features (int): The number of input features.
|
179 |
+
num_classes (int): The number of output classes.
|
180 |
+
"""
|
181 |
+
|
182 |
+
def __init__(self, num_features: int, num_classes: int):
|
183 |
+
super().__init__()
|
184 |
+
self.num_classes = num_classes
|
185 |
+
self.linear = torch.nn.Linear(num_features, num_classes * 2)
|
186 |
+
self.act = torch.nn.Sigmoid()
|
187 |
+
self.gate = torch.nn.Sigmoid()
|
188 |
+
|
189 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
190 |
+
"""
|
191 |
+
Applies the gated head to the input tensor.
|
192 |
+
|
193 |
+
Args:
|
194 |
+
x (torch.Tensor): The input tensor.
|
195 |
+
|
196 |
+
Returns:
|
197 |
+
torch.Tensor: The output tensor after applying the gated head.
|
198 |
+
"""
|
199 |
+
x = self.linear(x)
|
200 |
+
x = self.act(x[:, :self.num_classes]) * self.gate(x[:, self.num_classes:])
|
201 |
+
return x
|
202 |
+
|
203 |
+
|
204 |
+
model.head = GatedHead(min(model.head.weight.shape), 9083)
|
205 |
+
safetensors.torch.load_model(
|
206 |
+
model, "/home/kade/source/repos/JTP2/JTP_PILOT2-e3-vit_so400m_patch14_siglip_384.safetensors"
|
207 |
+
)
|
208 |
+
|
209 |
+
if torch.cuda.is_available():
|
210 |
+
model.cuda()
|
211 |
+
if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
|
212 |
+
model.to(dtype=torch.float16, memory_format=torch.channels_last)
|
213 |
+
|
214 |
+
model.eval()
|
215 |
+
|
216 |
+
with open("/home/kade/source/repos/JTP2/tags.json", "r", encoding="utf-8") as file:
|
217 |
+
tags = json.load(file) # type: dict
|
218 |
+
allowed_tags = list(tags.keys())
|
219 |
+
|
220 |
+
for idx, tag in enumerate(allowed_tags):
|
221 |
+
allowed_tags[idx] = tag.replace("_", " ")
|
222 |
+
|
223 |
+
sorted_tag_score = {}
|
224 |
+
|
225 |
+
|
226 |
+
def run_classifier(image, threshold):
|
227 |
+
"""
|
228 |
+
Runs the classifier on a single image and returns tags based on the threshold.
|
229 |
+
|
230 |
+
Args:
|
231 |
+
image (PIL.Image): The input image.
|
232 |
+
threshold (float): The probability threshold for including tags.
|
233 |
+
|
234 |
+
Returns:
|
235 |
+
tuple: A tuple containing the comma-separated tags and a dictionary of
|
236 |
+
tag probabilities.
|
237 |
+
"""
|
238 |
+
global sorted_tag_score
|
239 |
+
img = image.convert('RGBA')
|
240 |
+
tensor = transform(img).unsqueeze(0)
|
241 |
+
if torch.cuda.is_available():
|
242 |
+
tensor = tensor.cuda()
|
243 |
+
if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
|
244 |
+
tensor = tensor.to(dtype=torch.float16, memory_format=torch.channels_last)
|
245 |
+
with torch.no_grad():
|
246 |
+
probits = model(tensor)[0].cpu()
|
247 |
+
values, indices = probits.topk(250)
|
248 |
+
tag_score = dict()
|
249 |
+
for i in range(indices.size(0)):
|
250 |
+
tag_score[allowed_tags[indices[i]]] = values[i].item()
|
251 |
+
sorted_tag_score = dict(
|
252 |
+
sorted(tag_score.items(), key=lambda item: item[1], reverse=True)
|
253 |
+
)
|
254 |
+
return create_tags(threshold)
|
255 |
+
|
256 |
+
|
257 |
+
def create_tags(threshold):
|
258 |
+
"""
|
259 |
+
Creates a list of tags based on the current sorted_tag_score and the given
|
260 |
+
threshold.
|
261 |
+
|
262 |
+
Args:
|
263 |
+
threshold (float): The probability threshold for including tags.
|
264 |
+
|
265 |
+
Returns:
|
266 |
+
tuple: A tuple containing the comma-separated tags and a dictionary of
|
267 |
+
filtered tag probabilities.
|
268 |
+
"""
|
269 |
+
global sorted_tag_score
|
270 |
+
filtered_tag_score = {
|
271 |
+
key: value for key, value in sorted_tag_score.items() if value > threshold
|
272 |
+
}
|
273 |
+
text_no_impl = ", ".join(filtered_tag_score.keys())
|
274 |
+
return text_no_impl, filtered_tag_score
|
275 |
+
|
276 |
+
|
277 |
+
def process_directory(directory, threshold):
|
278 |
+
"""
|
279 |
+
Processes all images in a directory and its subdirectories, generating tags
|
280 |
+
for each image.
|
281 |
+
|
282 |
+
Args:
|
283 |
+
directory (str): The path to the directory containing images.
|
284 |
+
threshold (float): The probability threshold for including tags.
|
285 |
+
|
286 |
+
Returns:
|
287 |
+
dict: A dictionary mapping image paths to their generated tags.
|
288 |
+
"""
|
289 |
+
results = {}
|
290 |
+
for root, _, files in os.walk(directory):
|
291 |
+
for file in files:
|
292 |
+
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.jxl')):
|
293 |
+
image_path = os.path.join(root, file)
|
294 |
+
image = Image.open(image_path)
|
295 |
+
tags, _ = run_classifier(image, threshold)
|
296 |
+
results[image_path] = tags
|
297 |
+
# Save tags to a text file with the same name as the image
|
298 |
+
text_file_path = os.path.splitext(image_path)[0] + ".txt"
|
299 |
+
with open(text_file_path, "w", encoding="utf-8") as text_file:
|
300 |
+
text_file.write(tags)
|
301 |
+
return results
|
302 |
+
|
303 |
+
|
304 |
+
if __name__ == "__main__":
|
305 |
+
parser = argparse.ArgumentParser(
|
306 |
+
description="Run inference on a directory of images."
|
307 |
+
)
|
308 |
+
parser.add_argument("directory", type=str, help="Target directory containing images.")
|
309 |
+
parser.add_argument(
|
310 |
+
"--threshold", type=float, default=0.2, help="Threshold for tag filtering."
|
311 |
+
)
|
312 |
+
args = parser.parse_args()
|
313 |
+
|
314 |
+
results = process_directory(args.directory, args.threshold)
|
315 |
+
for image_path, tags in results.items():
|
316 |
+
print(f"{image_path}: {tags}")
|
paper-qa.code-workspace
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"folders": [
|
3 |
+
{
|
4 |
+
"path": "."
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"path": "../miniconda3/lib/python3.12/site-packages/paperqa"
|
8 |
+
}
|
9 |
+
],
|
10 |
+
"settings": {}
|
11 |
+
}
|
papers_please
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import subprocess
|
5 |
+
import sys
|
6 |
+
from pathlib import Path
|
7 |
+
import pickle
|
8 |
+
from paperqa import Settings, Docs
|
9 |
+
|
10 |
+
local_llm_config = {
|
11 |
+
"model_list": [
|
12 |
+
{
|
13 |
+
"model_name": "ollama/llama3.1",
|
14 |
+
"litellm_params": {
|
15 |
+
"model": "ollama/llama3.1",
|
16 |
+
},
|
17 |
+
},
|
18 |
+
]
|
19 |
+
}
|
20 |
+
local_emb_config = {
|
21 |
+
"model_list": [
|
22 |
+
{
|
23 |
+
"model_name": "ollama/mxbai-embed-large",
|
24 |
+
"litellm_params": {
|
25 |
+
"model": "ollama/mxbai-embed-large",
|
26 |
+
},
|
27 |
+
}
|
28 |
+
]
|
29 |
+
}
|
30 |
+
|
31 |
+
settings = Settings(
|
32 |
+
llm="ollama/llama3.1",
|
33 |
+
llm_config=local_llm_config,
|
34 |
+
summary_llm="ollama/llama3.1",
|
35 |
+
summary_llm_config=local_llm_config,
|
36 |
+
embedding="ollama/mxbai-embed-large",
|
37 |
+
embedding_config=local_emb_config,
|
38 |
+
)
|
39 |
+
|
40 |
+
def find_main_tex_file(folder_path: Path):
|
41 |
+
"""
|
42 |
+
Find the main LaTeX file in the given folder.
|
43 |
+
|
44 |
+
This function searches for a .tex file that is likely to be the main file
|
45 |
+
of a LaTeX project. It first checks for common names like 'main.tex',
|
46 |
+
then looks for files containing '\\documentclass', and finally returns
|
47 |
+
the first .tex file if no other criteria are met.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
folder_path (Path): The path to the folder to search in.
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
Path: The path to the main .tex file, or None if no .tex files are found.
|
54 |
+
"""
|
55 |
+
tex_files = list(folder_path.glob('**/*.tex'))
|
56 |
+
if not tex_files:
|
57 |
+
return None
|
58 |
+
|
59 |
+
# Check for common main file names
|
60 |
+
common_names = ['main.tex', 'paper.tex', 'article.tex']
|
61 |
+
for name in common_names:
|
62 |
+
if name in tex_files:
|
63 |
+
return name
|
64 |
+
|
65 |
+
# If no common name found, look for \documentclass
|
66 |
+
for file in tex_files:
|
67 |
+
with open(file, 'r', encoding='utf-8') as f:
|
68 |
+
content = f.read()
|
69 |
+
if '\\documentclass' in content:
|
70 |
+
return file
|
71 |
+
# If still not found, return the first .tex file
|
72 |
+
return tex_files[0]
|
73 |
+
|
74 |
+
def run_latexpand(input_file, output_file):
|
75 |
+
"""
|
76 |
+
Run the latexpand command on the input file and write the result to the output file.
|
77 |
+
|
78 |
+
This function uses the latexpand tool to expand a LaTeX file, including all its
|
79 |
+
inputs and packages, into a single file. The expanded content is then written
|
80 |
+
to the specified output file.
|
81 |
+
|
82 |
+
Args:
|
83 |
+
input_file (str or Path): The path to the input LaTeX file.
|
84 |
+
output_file (str or Path): The path where the expanded LaTeX content will be written.
|
85 |
+
|
86 |
+
Raises:
|
87 |
+
subprocess.CalledProcessError: If latexpand encounters an error during execution.
|
88 |
+
FileNotFoundError: If the latexpand command is not found in the system PATH.
|
89 |
+
"""
|
90 |
+
try:
|
91 |
+
result = subprocess.run(['latexpand', input_file],
|
92 |
+
capture_output=True, text=True, check=True)
|
93 |
+
with open(output_file, 'w', encoding='utf-8') as output_file_handle:
|
94 |
+
output_file_handle.write(result.stdout)
|
95 |
+
print(f"Expanded LaTeX written to {output_file}")
|
96 |
+
except subprocess.CalledProcessError as e:
|
97 |
+
print(f"Error running latexpand: {e}")
|
98 |
+
except FileNotFoundError:
|
99 |
+
print("latexpand not found. Please make sure it's installed and in your PATH.")
|
100 |
+
|
101 |
+
|
102 |
+
cache_path = Path("pqa_index.pkl")
|
103 |
+
|
104 |
+
if cache_path.exists():
|
105 |
+
with open(cache_path, "rb") as f:
|
106 |
+
docs = pickle.load(f)
|
107 |
+
else:
|
108 |
+
docs = Docs()
|
109 |
+
for root, dirs, files in Path(".").walk():
|
110 |
+
for dir_name in dirs:
|
111 |
+
if dir_name.startswith("arXiv-"):
|
112 |
+
dir_path = root / dir_name
|
113 |
+
concat_main = dir_path / ".main.tex"
|
114 |
+
try:
|
115 |
+
# Step 1: Find the main entry TeX file
|
116 |
+
main_file = find_main_tex_file(dir_path)
|
117 |
+
if not main_file:
|
118 |
+
raise ValueError("No main TeX file found.")
|
119 |
+
# Step 2 & 3: Run latexpand and write output
|
120 |
+
run_latexpand(main_file, dir_path / ".main.tex")
|
121 |
+
except (ValueError, subprocess.CalledProcessError,
|
122 |
+
FileNotFoundError) as preprocess_error:
|
123 |
+
print(f"Failed to pre-process {dir_name}: {preprocess_error}")
|
124 |
+
continue
|
125 |
+
print(f"adding {dir_path} (latex source)")
|
126 |
+
try:
|
127 |
+
docs.add(concat_main, settings=settings, disable_check=True)
|
128 |
+
except (IOError, OSError, ValueError) as add_error:
|
129 |
+
print(f"Failed to add {dir_path}: {add_error}")
|
130 |
+
continue
|
131 |
+
dirs.remove(dir_name)
|
132 |
+
break
|
133 |
+
else:
|
134 |
+
for file_name in files:
|
135 |
+
if file_name.lower().endswith((".pdf", ".txt", ".md", ".tex")):
|
136 |
+
file_path = root / file_name
|
137 |
+
print(f"adding {file_path}")
|
138 |
+
docs.add(file_path, settings=settings, disable_check=True)
|
139 |
+
|
140 |
+
with open(cache_path, "wb") as f:
|
141 |
+
pickle.dump(docs, f)
|
142 |
+
|
143 |
+
|
144 |
+
if __name__ == "__main__":
|
145 |
+
if len(sys.argv) > 1:
|
146 |
+
QUERY = " ".join(sys.argv[1:])
|
147 |
+
answer = docs.query(QUERY, settings=settings)
|
148 |
+
print(answer)
|
149 |
+
else:
|
150 |
+
print("Please provide a query as a command-line argument.")
|
151 |
+
print("Usage: python script_name.py 'Your query here'")
|
password
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import random
|
5 |
+
import string
|
6 |
+
|
7 |
+
def generate_password(length=16):
|
8 |
+
characters = string.ascii_letters + string.digits + string.punctuation
|
9 |
+
password = ''.join(random.choice(characters) for _ in range(length))
|
10 |
+
return password
|
11 |
+
|
12 |
+
# Generate a strong 16-character long password
|
13 |
+
strong_password = generate_password()
|
14 |
+
print(strong_password)
|