k4d3 commited on
Commit
f1a2ec8
·
1 Parent(s): efca0c9

Initial commit

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

.tmux.conf ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # List of plugins
2
+ set -g @plugin 'tmux-plugins/tpm'
3
+ set -g @plugin 'tmux-plugins/tmux-sensible'
4
+ set -g @plugin 'tmux-plugins/tmux-yank'
5
+ set -g @plugin 'tmux-plugins/tmux-resurrect'
6
+ set -g @plugin 'tmux-plugins/tmux-continuum'
7
+ set -g @plugin 'sainnhe/tmux-fzf'
8
+ set -g @plugin 'catppuccin/tmux'
9
+
10
+ # Enable clipboard integration
11
+ set -g set-clipboard on
12
+
13
+ # Enable tmux-continuum and set it to boot on start
14
+ set -g @continuum-boot 'on'
15
+ # Set the strategy for tmux-resurrect to use nvim sessions
16
+ set -g @resurrect-strategy-nvim 'session'
17
+
18
+ # Disable the bell action
19
+ set-option -g bell-action none
20
+
21
+ # Set the default shell to zsh without global rc files
22
+ set-option -g default-command "zsh --no-globalrcs"
23
+ # Enable setting terminal titles
24
+ set -g set-titles on
25
+ # Set the format for terminal titles
26
+ set -g set-titles-string '#T #{pane_current_command}'
27
+ # Set the window size to the smallest
28
+ set -g window-size smallest
29
+ # Enable aggressive resize for windows
30
+ setw -g aggressive-resize on
31
+ # Enable mouse support
32
+ set -g mouse on
33
+ # Set the default terminal type to tmux-256color
34
+ set -g default-terminal "tmux-256color"
35
+ # Append terminal overrides for xterm-256color
36
+ set-option -ga terminal-overrides ",xterm-256color:Tc"
37
+
38
+ # Bind 'r' to reload the tmux config and display a message
39
+ bind r source-file ~/.tmux.conf \; display "Config reloaded!"
40
+
41
+ # Initialize TMUX plugin manager (keep this line at the very bottom of tmux.conf)
42
+ run '~/.tmux/plugins/tpm/tpm'
.zshrc ADDED
@@ -0,0 +1,498 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export LANG=ja_JP.UTF-8
2
+ export LC_ALL=ja_JP.UTF-8
3
+
4
+ display_custom_help() {
5
+ echo "----------------------------------------------------------------------------------------------------------------------"
6
+ printf "%s\n" "$(conda env list)"
7
+ echo "----------------------------------------------------------------------------------------------------------------------"
8
+ echo "LLMs"
9
+ echo "---"
10
+ echo "conda activate openwebui && open-webui serve --port 6969"
11
+ echo "ollama serve"
12
+ echo "----------------------------------------------------------------------------------------------------------------------"
13
+ echo "Taggers + Captioners"
14
+ echo "----------------------------------------------------------------------------------------------------------------------"
15
+ echo "JTP2"
16
+ echo "---"
17
+ echo "~/toolkit/jtp2 <dir>"
18
+ echo "Joy Captioner"
19
+ echo "---"
20
+ echo "~/source/repos/joy/joy <dir> --custom_prompt \"<prompt>\" --caption_type custom"
21
+ echo "Waifu Diffusion Tagger:"
22
+ echo "---"
23
+ echo "python ~/source/repos/wdv3-timm/wdv3_timm.py <dir> --model eva02"
24
+ echo "----------------------------------------------------------------------------------------------------------------------"
25
+ echo "Database Stuff"
26
+ echo "----------------------------------------------------------------------------------------------------------------------"
27
+ echo "Redis"
28
+ echo "---"
29
+ echo "~/db/redis-stable/src/redis-server : Start server."
30
+ echo "PostgreSQL"
31
+ echo "---"
32
+ echo "psql -d postgres -h /tmp : Connect using socket directory."
33
+ echo "Start server:"
34
+ echo "pg_ctl -D \$HOME/db/postgresql/data -l \$HOME/db/pgsql.log start"
35
+ # echo "Commands, Aliases, and Custom Functions:"
36
+ # echo "----------------------------------------------------------------------------------------------------------------------"
37
+ # echo "pie : \`pip install -e . --use-pep517\`"
38
+ # echo "gcs : \`git clone --recurse-submodules\`"
39
+ # echo "dust : A more intuitive version of du."
40
+ # echo "ranger : A vim inspired file manager."
41
+ # echo "htop : Interactive process viewer."
42
+ # echo "nvtop : Interactive GPU process viewer."
43
+ # echo "nvitop : An even more interactive GPU process viewer."
44
+ # echo "nvim : Alias for vim."
45
+ # echo "rt : Edit tmux config and reload it."
46
+ # echo "zr : Edit zsh config and reload it."
47
+ # echo "ta : Attach to tmux session."
48
+ # echo "ga : Git add, commit, and push."
49
+ # echo "gs : Git status."
50
+ # echo "wd : Word diff in git."
51
+ # echo "grabber : Alias for Grabber-cli."
52
+ # echo "ls : Alias for 'ls --color=always'."
53
+ # echo "----------------------------------------------------------------------------------------------------------------------"
54
+ echo "- 🐺 TOOLS -"
55
+ echo "----------------------------------------------------------------------------------------------------------------------"
56
+ echo "nv : Returns the cuda version number."iexport LANG=ja_JP.UTF-8
57
+ export LC_ALL=ja_JP.UTF-8
58
+
59
+
60
+ echo "remove_repetition : Removes repetition in txt files in a target directory."
61
+ echo "copy_sample_prompts : Copies ./sample-prompt.txt file from the current dir to datasets/furry."
62
+ echo "remove_number_prefix : Removes all numbers prefixed by a _ from the end of every file."
63
+ echo "count_captions : Counts *.caption and *.txt files in each subdirectory."
64
+ echo "count_captions_per_folder : Counts *.caption and *.txt files in each subdirectory individually."
65
+ echo "llama : Runs Meta-Llama-3-8B-Instruct on port 6969."
66
+ echo "copy_matching_caption_files : Copies matching .caption files for <dir> to the current directory."
67
+ echo "c : Change to ComfyUI directory and start the server."
68
+ echo "t : Start TensorBoard with logs directory."
69
+ echo "png2mp4 : Convert PNG sequence to MP4 video."
70
+ echo "seed <file> : Display the seed from a safetensors file."
71
+ echo "swch <branch> : Clean repo and switch to specified git branch."
72
+ echo "convert_to_jxl <directory> : Convert JPG, JPEG, and PNG files to JXL in the specified directory."
73
+ echo "convert_pxl_to_png <directory> : Convert PXL files to PNG in the specified directory."
74
+ echo "replace_text_in_files [dir] <src> <replace> : Perform text replacement on *.txt files in a target directory."
75
+ echo "update_dir [directory] : Update git repositories in subdirectories."
76
+ echo "inject_to_captions [dir] \"txt\" : Add prefix to the beginning of each text file in a directory."
77
+ echo "chop_lora <input_file> : Generate multiple versions of a Lora file with different presets."
78
+ echo "----------------------------------------------------------------------------------------------------------------------"
79
+ }
80
+
81
+ export RUST_BACKTRACE=1
82
+
83
+ # This function `nv` retrieves the version of the NVIDIA CUDA Compiler (nvcc) installed on the system.
84
+ # It extracts the version number from the `nvcc --version` command output.
85
+ # The version number is then formatted by removing the dot (e.g., 12.6 becomes 126).
86
+ # Finally, the function returns the formatted version number.
87
+ function nv() {
88
+ # Get the nvcc version output
89
+ local nvcc_output=$(nvcc --version)
90
+
91
+ # Extract the version number (12.6)
92
+ local version=$(echo "$nvcc_output" | grep -oP 'release \K[0-9]+\.[0-9]+')
93
+
94
+ # Remove the dot to get 126
95
+ local result=$(echo "$version" | tr -d '.')
96
+
97
+ # Print the result
98
+ echo $result
99
+ }
100
+
101
+ export BNB_CUDA_VERSION=126
102
+
103
+ # Function to remove consecutive repeated words in text files within a directory
104
+ remove_repetition() {
105
+ local dir=$1 # The directory to search for text files
106
+ # Find all .txt files in the specified directory and process each file
107
+ find "$dir" -type f -name "*.txt" | while read -r file; do
108
+ # Use awk to process each line of the file
109
+ awk '
110
+ {
111
+ n = split($0, words, " ") # Split the line into words
112
+ for (i = n; i > 1; i--) { # Iterate from the last word to the second word
113
+ if (words[i] != words[i-1]) break # Stop if the current word is not equal to the previous word
114
+ }
115
+ for (j = 1; j <= i; j++) { # Print the words up to the point where repetition ends
116
+ printf "%s%s", words[j], (j == i ? ORS : OFS) # Print the word followed by a space or newline
117
+ }
118
+ }
119
+ ' "$file" > "${file}.tmp" && mv "${file}.tmp" "$file" # Save the processed content to a temporary file and replace the original file
120
+ done
121
+ }
122
+
123
+ # This alias 'pie' is a shortcut for installing a Python package in editable mode
124
+ # using the pip command with the --use-pep517 option.
125
+ alias pie='pip install -e . --use-pep517'
126
+
127
+ # Function to remove specific tags from all *.txt files in a target directory recursively
128
+ remove_boys() {
129
+ # Assign the first argument passed to the function to the variable target_dir
130
+ local target_dir="$1"
131
+
132
+ # Find all *.txt files in the target directory and its subdirectories
133
+ find "$target_dir" -type f -name "*.txt" | while read -r file; do
134
+ # Use sed to remove occurrences of [1-9]boy, [1-9]boys, [1-9]girl, and [1-9]girls along with a comma and space character
135
+ # -i.bak creates a backup of the original file with a .bak extension
136
+ # -E enables extended regular expressions
137
+ sed -i.bak -E 's/, ([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)//g' "$file"
138
+ done
139
+ }
140
+
141
+ export DOTNET_CLI_TELEMETRY_OPTOUT=1
142
+
143
+ # Organizes a sample prompt file from the current directory to datasets/furry.
144
+ # It moves the file named sample-prompts.txt to either
145
+ # ~/datasets/furry/sample_prompts/pony or ~/datasets/furry/sample_prompts/compass based on the content.
146
+ # If the file contains the regexp 'score_*', it is moved to ~/datasets/furry/sample_prompts/pony.
147
+ # Otherwise, it is moved to ~/datasets/furry/sample_prompts/compass.
148
+ # The -v option is used with cp to provide verbose output.
149
+ copy_sample_prompts() {
150
+ file="./sample-prompts.txt"
151
+ if grep -q 'score_*' "$file"; then
152
+ cp -v "$file" ~/datasets/furry/sample_prompts/pony/
153
+ else
154
+ cp -v "$file" ~/datasets/furry/sample_prompts/compass/
155
+ fi
156
+
157
+ echo "File has been organized."
158
+ }
159
+
160
+ # Removes all numbers prefixed by a _ from the end of every file before the file extension
161
+ remove_number_prefix() {
162
+ # Loop through all files in the current directory and its subdirectories
163
+ for file in **/*_[0-9]*.*; do
164
+ # Get the new file name by removing '_number' before the file extension
165
+ new_file="${file%_[0-9]*.*}.${file##*.}"
166
+ # Rename the file
167
+ mv "$file" "$new_file"
168
+ done
169
+ }
170
+
171
+ # Counts all *.caption and *.txt files in all subdirectories.
172
+ count_captions() {
173
+ caption_count=$(find . -type f -name "*.caption" | wc -l)
174
+ txt_count=$(find . -type f -name "*.txt" | wc -l)
175
+ echo "*.caption files: $caption_count"
176
+ echo "*.txt files: $txt_count"
177
+ }
178
+
179
+ # Counts *.caption and *.txt files in each subdirectory individually.
180
+ count_captions_per_folder() {
181
+ for dir in */; do
182
+ echo "Directory: $dir"
183
+ echo -n "*.caption files: "
184
+ find "$dir" -type f -name "*.caption" | wc -l
185
+ echo -n "*.txt files: "
186
+ find "$dir" -type f -name "*.txt" | wc -l
187
+ done
188
+ }
189
+
190
+ # open-webui
191
+ oui() {
192
+ conda activate openwebui
193
+ open-webui serve --port 6969
194
+ }
195
+
196
+ llama() {
197
+ ~/models/Meta-Llama-3-8B-Instruct.Q5_K_M.llamafile -cb -np 4 -a llama-3-8b --embedding --port 11434
198
+ }
199
+
200
+ alias gcs='git clone --recurse-submodules'
201
+
202
+ # Function to copy matching .caption files
203
+ copy_matching_caption_files() {
204
+ # Define the target directory
205
+ TARGET_DIR="$1"
206
+
207
+ # Loop through each image file in the current directory
208
+ for image_file in *.(jpg|jpeg|png|gif|bmp|tiff|webp|jxl); do
209
+ # Check if the file exists (to handle cases where no files match the pattern)
210
+ if [[ -f "$image_file" ]]; then
211
+ # Extract the base name (without extension)
212
+ base_name="${image_file%.*}"
213
+
214
+ # Define the corresponding .caption file in the target directory
215
+ caption_file="$TARGET_DIR/$base_name.caption"
216
+
217
+ # Check if the .caption file exists
218
+ if [[ -f "$caption_file" ]]; then
219
+ # Copy the .caption file to the current directory
220
+ cp "$caption_file" .
221
+ echo "Copied $caption_file to the current directory."
222
+ else
223
+ echo "No matching .caption file for $image_file."
224
+ fi
225
+ fi
226
+ done
227
+ }
228
+
229
+
230
+ # This script performs a text replacement operation in all .txt files within a specified directory.
231
+ # It takes three arguments:
232
+ # 1. target_dir: The directory containing the .txt files where the text replacement will occur.
233
+ # 2. search_text: The text string that needs to be replaced.
234
+ # 3. replace_text: The text string that will replace the search_text.
235
+ #
236
+ # The script uses a for loop to iterate through all .txt files in the target directory.
237
+ # It utilizes the 'sed' command to perform an in-place replacement of the search_text with the replace_text in each file.
238
+ # After processing all files, it prints a message indicating the completion of the text replacement operation.
239
+ replace_text_in_files() {
240
+ local target_dir=$1
241
+ local search_text=$2
242
+ local replace_text=$3
243
+
244
+ # Loop through all .txt files in the target directory
245
+ for file in "$target_dir"/*.txt; do
246
+ # Use sed to replace the text
247
+ sed -i "s/$search_text/$replace_text/g" "$file"
248
+ done
249
+
250
+ echo "Text replacement complete in $target_dir!"
251
+ }
252
+
253
+ # Example usage:
254
+ # replace_text_in_files "/path/to/directory" "squishy (artist)" "by squishy (artist)"
255
+
256
+
257
+ # This script adds a specified prefix to the beginning of each text file in a given directory.
258
+ # Usage: inject_to_captions <directory> <prefix>
259
+ # Arguments:
260
+ # <directory> - The directory containing the text files to be modified.
261
+ # <prefix> - The prefix to be added to the beginning of each text file.
262
+ # The script checks if the specified directory exists and iterates over each text file in the directory.
263
+ # For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
264
+ # If the directory does not exist, it prints an error message.
265
+ inject_to_captions() {
266
+ local dir="$1"
267
+ local prefix="$2"
268
+ if [[ -d "$dir" ]]; then
269
+ for file in "$dir"/*.txt; do
270
+ if [[ -f "$file" ]]; then
271
+ if ! grep -q "$prefix" "$file"; then
272
+ # Use a temporary file to store the modified content
273
+ local temp_file=$(mktemp)
274
+ echo "${prefix}, $(cat "$file")" > "$temp_file"
275
+ mv "$temp_file" "$file"
276
+ echo "Added '${prefix}, ' to the front of $file"
277
+ else
278
+ echo "The tag '${prefix}' already exists in $file"
279
+ fi
280
+ fi
281
+ done
282
+ else
283
+ echo "Directory $dir does not exist."
284
+ fi
285
+ }
286
+
287
+ # Function to update git repositories in subdirectories
288
+ update_dir() {
289
+ local target_dir="${1:-.}"
290
+
291
+ # Check if there are any subdirectories
292
+ if [[ -n "$(find "$target_dir" -mindepth 1 -maxdepth 1 -type d)" ]]; then
293
+ for dir in "$target_dir"/*/; do
294
+ if [[ -d "$dir" ]]; then
295
+ (
296
+ cd "$dir" || return
297
+ # If the directory is a git repository, pull the latest changes
298
+ if [[ -d ".git" ]]; then
299
+ echo "Updating $(pwd)"
300
+ git pull
301
+ fi
302
+ )
303
+ fi
304
+ done
305
+ fi
306
+ }
307
+
308
+ export TOKENIZERS_PARALLELISM=false
309
+
310
+ alias grabber="Grabber-cli"
311
+
312
+ #export force_color_prompt=yes
313
+
314
+ chop_lora() {
315
+ local input_file="$1"
316
+ local base_name="${input_file:r}" # Remove extension
317
+
318
+ # Define presets and their corresponding vector strings
319
+ declare -A presets=(
320
+ ["ringdingding"] = "1,0,0,0,0,0,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0"
321
+ ["squeaker"] = "1,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,0,0,0,0,0"
322
+ ["heavylifter"] = "1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0"
323
+ ["style1"] = "1,0,0,0,1,1,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
324
+ ["style2"] = "1,0,0,0,0,0,0,1,1,0,0,0,1,1,1,1,1,1,0,0,0"
325
+ ["beeg"] = "1,0,0,0,1,1,0,1,1,0,1,0,1,1,1,0,0,0,0,0,0"
326
+ ["all"] = "1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1"
327
+ ["allin"] = "1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0"
328
+ ["allmid"] = "1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0"
329
+ ["allout"] = "1,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1"
330
+ )
331
+
332
+ for preset in ${(k)presets}; do
333
+ local output_file="${base_name}-${preset}.safetensors"
334
+ local vector_string="${presets[$preset]}"
335
+ echo "Generating $output_file"
336
+ python ~/source/repos/resize_lora/chop_blocks.py "$input_file" "$vector_string" -o "$output_file"
337
+ done
338
+ }
339
+
340
+ function swch() {
341
+ if [ -z "$1" ]; then
342
+ echo "Please provide a branch name."
343
+ return 1
344
+ fi
345
+ branchname=$1
346
+ git clean -fxd && git pull && git checkout $branchname
347
+ }
348
+
349
+ export COMFYUI_PATH="$HOME/ComfyUI"
350
+ export ZSH="$HOME/.oh-my-zsh"
351
+
352
+ ZSH_THEME="kade"
353
+ # CASE_SENSITIVE="true"
354
+ # HYPHEN_INSENSITIVE="true"
355
+ # DISABLE_MAGIC_FUNCTIONS="true"
356
+ # DISABLE_LS_COLORS="true"
357
+ # DISABLE_AUTO_TITLE="true"
358
+ # ENABLE_CORRECTION="true"
359
+ # COMPLETION_WAITING_DOTS="true"
360
+ # DISABLE_UNTRACKED_FILES_DIRTY="true"
361
+
362
+ plugins=(git autojump conda-env)
363
+
364
+ extract_iframes() {
365
+ # Assign input arguments
366
+ input_file="$1"
367
+ scene_change_fraction="${2:-0.1}"
368
+
369
+ # Get the base filename without extension
370
+ base_name=$(basename "$input_file" .webm)
371
+
372
+ # Run ffmpeg command
373
+ /usr/bin/ffmpeg -i "$input_file" -f image2 -vf "select=eq(pict_type\,PICT_TYPE_I)*gt(scene\,$scene_change_fraction),showinfo" -fps_mode vfr "${base_name}-%06d.png"
374
+ }
375
+
376
+ convert_to_jxl() {
377
+ local target_directory="$1"
378
+
379
+ # Ensure the target directory exists
380
+ if [[ ! -d "$target_directory" ]]; then
381
+ echo "The specified directory does not exist: $target_directory" >&2
382
+ return 1
383
+ fi
384
+
385
+ # Find all JPG, JPEG, and PNG files in the target directory and all subdirectories
386
+ find "$target_directory" \( -name "*.jpg" -o -name "*.jpeg" -o -name "*.png" \) -type f | while read -r file; do
387
+ input_path="$file"
388
+ output_path="${file%.*}.jxl"
389
+
390
+ # Convert to JXL using ImageMagick
391
+ if magick convert "$input_path" "$output_path"; then
392
+ echo "Converted: $input_path -> $output_path"
393
+ else
394
+ echo "Failed to convert $input_path" >&2
395
+ fi
396
+ done
397
+
398
+ echo "Conversion complete."
399
+ }
400
+
401
+
402
+ convert_pxl_to_png() {
403
+ local target_directory="$1"
404
+
405
+ # Ensure the target directory exists
406
+ if [[ ! -d "$target_directory" ]]; then
407
+ echo "The specified directory does not exist: $target_directory" >&2
408
+ return 1
409
+ fi
410
+
411
+ # Find all PXL files in the target directory and all subdirectories
412
+ find "$target_directory" -type f -name "*.pxl" | while read -r file; do
413
+ input_path="$file"
414
+ output_path="${file%.pxl}.png"
415
+
416
+ # Convert PXL to PNG using ImageMagick
417
+ if magick convert "$input_path" "$output_path"; then
418
+ echo "Converted: $input_path -> $output_path"
419
+ else
420
+ echo "Failed to convert $input_path" >&2
421
+ fi
422
+ done
423
+
424
+ echo "Conversion complete."
425
+ }
426
+
427
+
428
+ seed() {
429
+ local filePath="$1"
430
+ python3 -c "
431
+ import safetensors, json
432
+ filePath = '$filePath'
433
+ print(json.loads(safetensors.safe_open(filePath, 'np').metadata().get('ss_seed', 'Not found')))
434
+ "
435
+ }
436
+
437
+ png2mp4() {
438
+ ffmpeg -framerate 8 -pattern_type glob -i '*.png' -vf scale=512x512 -crf 28 \
439
+ -c:v libx264 -pix_fmt yuv420p out.mp4
440
+ }
441
+
442
+
443
+ source $ZSH/oh-my-zsh.sh
444
+
445
+ export PATH=$PATH:$HOME/.local/bin:$HOME/source/repos/dataset-tools/target/x86_64-unknown-linux-gnu/release:$HOME/.cargo/bin:$HOME/miniconda3/bin:$HOME/toolkit:$HOME/db/redis-stable/src:$HOME/db/postgresql/bin
446
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib
447
+ export COMFYUI_MODEL_PATH=/home/kade/ComfyUI/models
448
+
449
+ c_old() {
450
+ cd ~/ComfyUI &&
451
+ python3.12 main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --fast
452
+ }
453
+
454
+ c() {
455
+ cd ~/ComfyUI &&
456
+ conda activate comfyui
457
+ python main.py --listen 0.0.0.0 --preview-method taesd --use-pytorch-cross-attention --disable-xformers --front-end-version Comfy-Org/ComfyUI_frontend@latest --fast
458
+ }
459
+
460
+ alias t="tensorboard --logdir=$HOME/output_dir/logs"
461
+ alias nvim="vim"
462
+ alias rt="vim ~/.tmux.conf && echo \"Reloading tmux config\" && tmux source ~/.tmux.conf"
463
+ alias zr="vim ~/.zshrc && echo \"Reloading zsh config\" && source ~/.zshrc"
464
+ alias ta="tmux att"
465
+ alias ga="git add . && git commit -avs && git push"
466
+ alias gs="git status"
467
+ alias wd="git diff --word-diff-regex='[^,]+' --patience"
468
+
469
+ source /home/kade/.config/broot/launcher/bash/br
470
+
471
+ [ -f ~/.fzf.zsh ] && source ~/.fzf.zsh
472
+
473
+ alias ls='ls --color=always'
474
+
475
+ # >>> conda initialize >>>
476
+ # !! Contents within this block are managed by 'conda init' !!
477
+ __conda_setup="$('/home/kade/miniconda3/bin/conda' 'shell.zsh' 'hook' 2> /dev/null)"
478
+ if [ $? -eq 0 ]; then
479
+ eval "$__conda_setup"
480
+ else
481
+ if [ -f "/home/kade/miniconda3/etc/profile.d/conda.sh" ]; then
482
+ . "/home/kade/miniconda3/etc/profile.d/conda.sh"
483
+ else
484
+ export PATH="/home/kade/miniconda3/bin:$PATH"
485
+ fi
486
+ fi
487
+ unset __conda_setup
488
+ # <<< conda initialize <<<
489
+
490
+ unset CONDA_CHANGEPS1
491
+
492
+ function conda_prompt_info() {
493
+ if [[ -n "$CONDA_DEFAULT_ENV" ]]; then
494
+ echo "(${CONDA_DEFAULT_ENV})"
495
+ fi
496
+ }
497
+
498
+ display_custom_help
9em124t2-499968/clip_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d7b0548d12fa649370896982c2af9d03d43285b782bd47639c96e6e0b29473c
3
+ size 1713067838
9em124t2-499968/config.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_project: joy-caption-1
2
+ device_batch_size: 2
3
+ batch_size: 256
4
+ learning_rate: 0.0002
5
+ warmup_samples: 18000
6
+ max_samples: 500000
7
+ save_every: 50000
8
+ test_every: 50000
9
+ use_amp: true
10
+ grad_scaler: true
11
+ lr_scheduler_type: cosine
12
+ min_lr_ratio: 0.0
13
+ allow_tf32: true
14
+ seed: 69
15
+ num_workers: 8
16
+ optimizer_type: adamw
17
+ adam_beta1: 0.9
18
+ adam_beta2: 0.999
19
+ adam_eps: 1.0e-08
20
+ adam_weight_decay: 0.0
21
+ clip_grad_norm: 1.0
22
+ dataset: fancyfeast/joy-captioning-20240917a
23
+ clip_model: google/siglip-so400m-patch14-384
24
+ text_model: meta-llama/Meta-Llama-3.1-8B
25
+ resume: null
26
+ gradient_checkpointing: false
27
+ test_size: 2048
28
+ grad_scaler_init: 65536.0
29
+ max_caption_length: 257
30
+ num_image_tokens: 32
31
+ adapter_type: mlp
32
+ text_model_dtype: bfloat16
33
+ pre_test: false
34
+ train_image_model: true
35
+ image_model_lr: null
36
+ train_lora: true
37
+ lora_r: 64
38
+ lora_alpha: 16
39
+ lora_dropout: 0.1
9em124t2-499968/image_adapter.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e53c3bf8df745a3c19ae3c70dbf9bf23cfdc8f3fdb937000a4eafd2a36914661
3
+ size 86067714
9em124t2-499968/text_model/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: meta-llama/Meta-Llama-3.1-8B
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
9em124t2-499968/text_model/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Meta-Llama-3.1-8B",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 64,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
9em124t2-499968/text_model/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b48221de174ab0db7b46b4833118c5c0a4c2bf0b51b77b4cc4ab04651bd06cca
3
+ size 109069176
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Balazs Horvath
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
ascii_art/gaeros ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ ascii_art = """
5
+ ▄▄ • ▄▄▄· ▄▄▄ .▄▄▄ .▄▄ ·
6
+ ▐█ ▀ ▪▐█ ▀█ ▀▄.▀·▀▄ █·▪ ▐█ ▀.
7
+ ▄█ ▀█▄▄█▀▀█ ▐▀▀▪▄▐▀▀▄ ▄█▀▄ ▄▀▀▀█▄
8
+ ▐█▄▪▐█▐█ ▪▐▌▐█▄▄▌▐█•█▌▐█▌.▐▌▐█▄▪▐█
9
+ ·▀▀▀▀ ▀ ▀ ▀▀▀ .▀ ▀ ▀█▄▀▪ ▀▀▀▀
10
+ """
11
+ print(ascii_art)
ascii_art/kade ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ ascii_art = """
5
+ ▄ •▄ ▄▄▄· ·▄▄▄▄ ▄▄▄ .
6
+ █▌▄▌▪▐█ ▀█ ██▪ ██ ▀▄.▀·
7
+ ▐▀▀▄·▄█▀▀█ ▐█· ▐█▌▐▀▀▪▄
8
+ ▐█.█▌▐█ ▪▐▌██. ██ ▐█▄▄▌
9
+ ·▀ ▀ ▀ ▀ ▀▀▀▀▀• ▀▀▀
10
+ """
11
+
12
+ print(ascii_art)
13
+
crawl/crawl ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Web Crawler and Content Saver
6
+
7
+ This module provides functionality to crawl web pages, extract content,
8
+ and save the results including markdown text and images. It uses the
9
+ WebCrawler class from crawl4ai and implements parallel image downloading.
10
+ """
11
+
12
+ import sys
13
+ import os
14
+ import re
15
+ import platform
16
+ from concurrent.futures import ThreadPoolExecutor, as_completed
17
+
18
+ import requests
19
+ from crawl4ai import WebCrawler
20
+
21
+
22
+ def create_crawler():
23
+ """
24
+ Create and initialize a WebCrawler instance.
25
+
26
+ Returns:
27
+ WebCrawler: An initialized WebCrawler object.
28
+ """
29
+ crawler = WebCrawler(verbose=True)
30
+ crawler.warmup()
31
+ return crawler
32
+
33
+
34
+ def sanitize_filename(filename):
35
+ """
36
+ Remove invalid characters from a filename to make it Windows-compatible.
37
+
38
+ Args:
39
+ filename (str): The original filename.
40
+
41
+ Returns:
42
+ str: The sanitized filename.
43
+ """
44
+ # Remove invalid characters for Windows file names
45
+ return re.sub(r'[<>:"/\\|?*]', '', filename)
46
+
47
+
48
+ def download_image(session, image_url, save_dir):
49
+ """
50
+ Download an image from a given URL and save it to the specified directory.
51
+
52
+ Args:
53
+ session (requests.Session):
54
+ The requests session to use for downloading.
55
+ image_url (str):
56
+ The URL of the image to download.
57
+ save_dir (str):
58
+ The directory to save the downloaded image.
59
+ """
60
+ try:
61
+ # Ensure the URL has a scheme
62
+ if not re.match(r'^https?://', image_url):
63
+ image_url = 'https://' + image_url.lstrip('/')
64
+
65
+ image_filename = os.path.basename(image_url).split('?')[0]
66
+ sanitized_image_filename = sanitize_filename(image_filename)
67
+ image_path = os.path.join(save_dir, sanitized_image_filename)
68
+
69
+ response = session.get(image_url, stream=True)
70
+ response.raise_for_status()
71
+ with open(image_path, 'wb') as image_file:
72
+ for chunk in response.iter_content(chunk_size=8192):
73
+ image_file.write(chunk)
74
+ print(f"Saved image: {image_path}")
75
+ except requests.RequestException as e:
76
+ print(f"Error downloading image {image_url}: {str(e)}")
77
+ except IOError as e:
78
+ print(f"Error saving image {image_url}: {str(e)}")
79
+
80
+
81
+ def save_result(target_url):
82
+ """
83
+ Crawl a given URL, extract content, and save the results.
84
+
85
+ This function crawls the specified URL, saves the markdown content,
86
+ and downloads all associated images in parallel.
87
+
88
+ Args:
89
+ target_url (str): The URL to crawl and save content from.
90
+ """
91
+ crawler = create_crawler()
92
+ result = crawler.run(url=target_url)
93
+ title = result.metadata.get('title', 'untitled')
94
+ sanitized_title = sanitize_filename(title).replace(" ", "_")
95
+
96
+ # Choose the appropriate base path based on the operating system
97
+ if platform.system() == "Windows":
98
+ base_path = "E:\\knowledgebase\\Saved Websites\\"
99
+ else:
100
+ base_path = "/home/kade/saved_websites/"
101
+
102
+ save_dir = os.path.join(base_path, sanitized_title)
103
+ os.makedirs(save_dir, exist_ok=True)
104
+
105
+ # Save markdown
106
+ save_path = os.path.join(save_dir, f"{sanitized_title}.md")
107
+ with open(save_path, "w", encoding="utf-8") as file:
108
+ file.write(result.markdown)
109
+ print(f"Saved markdown to {save_path}")
110
+
111
+ # Save images in parallel
112
+ if 'images' in result.media and isinstance(result.media['images'], list):
113
+ session = requests.Session()
114
+ headers = {
115
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
116
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
117
+ 'Chrome/91.0.4472.124 Safari/537.36',
118
+ 'Referer': target_url,
119
+ 'Accept': ('image/avif,image/webp,image/apng,image/svg+xml,'
120
+ 'image/*,*/*;q=0.8'),
121
+ 'Accept-Language': 'en-US,en;q=0.9',
122
+ 'Sec-Fetch-Dest': 'image',
123
+ 'Sec-Fetch-Mode': 'no-cors',
124
+ 'Sec-Fetch-Site': 'cross-site',
125
+ }
126
+ session.headers.update(headers)
127
+
128
+ with ThreadPoolExecutor(max_workers=5) as executor:
129
+ futures = []
130
+ for image_data in result.media['images']:
131
+ if 'src' in image_data:
132
+ futures.append(executor.submit(download_image,
133
+ session,
134
+ image_data['src'],
135
+ save_dir))
136
+
137
+ for future in as_completed(futures):
138
+ future.result()
139
+
140
+
141
+ if __name__ == "__main__":
142
+ if len(sys.argv) != 2:
143
+ print("Usage: python crawl.py <URL>")
144
+ else:
145
+ url = sys.argv[1]
146
+ save_result(url)
crawl/crawl4ai.pyi ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides a WebCrawler class for AI-related web crawling tasks.
3
+
4
+ The WebCrawler class is designed to crawl web pages, potentially for
5
+ AI-related data extraction or analysis. It offers methods for initializing
6
+ the crawler, warming it up, and running crawl operations on specified URLs.
7
+
8
+ Classes:
9
+ WebCrawler: A web crawler for AI-related tasks.
10
+
11
+ Example:
12
+ crawler = WebCrawler(verbose=True)
13
+ crawler.warmup()
14
+ result = crawler.run("https://example.com")
15
+ """
16
+
17
+ from typing import Any
18
+
19
+
20
+ class WebCrawler:
21
+ """
22
+ A web crawler for AI-related tasks.
23
+
24
+ This class provides functionality to crawl web pages,
25
+ potentially for AI-related data extraction or analysis.
26
+
27
+ Attributes:
28
+ verbose (bool): If True, enables verbose output during crawling.
29
+
30
+ Methods:
31
+ warmup(): Prepares the crawler for operation.
32
+ run(url: str): Crawls the specified URL and returns the result.
33
+ """
34
+
35
+ def __init__(self, verbose: bool = False) -> None:
36
+ self.verbose: bool = verbose
37
+
38
+ def warmup(self) -> None:
39
+ """
40
+ Prepares the crawler for operation.
41
+
42
+ This method should be called before running the crawler to ensure
43
+ all necessary resources and configurations are set up.
44
+ """
45
+
46
+ def run(self, url: str) -> Any:
47
+ """
48
+ Crawls the specified URL and returns the result.
49
+
50
+ Args:
51
+ url (str): The URL to crawl.
52
+
53
+ Returns:
54
+ Any: The result of the crawling operation. The specific type
55
+ depends on the implementation and could be raw HTML,
56
+ parsed data, or any other relevant information.
57
+ """
58
+
crawl/crawl_wikipedia ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ Web Crawler and Content Saver
6
+
7
+ This module provides functionality to crawl web pages, extract content,
8
+ and save the results including markdown text and images. It uses the
9
+ WebCrawler class from crawl4ai and implements parallel image downloading.
10
+ """
11
+
12
+ import sys
13
+ import os
14
+ import re
15
+ import platform
16
+ from concurrent.futures import ThreadPoolExecutor, as_completed
17
+ from urllib.parse import urljoin
18
+ from bs4 import BeautifulSoup
19
+
20
+ import requests
21
+ from crawl4ai import WebCrawler
22
+
23
+
24
+ def create_crawler():
25
+ """
26
+ Create and initialize a WebCrawler instance.
27
+
28
+ Returns:
29
+ WebCrawler: An initialized WebCrawler object.
30
+ """
31
+ crawler = WebCrawler(verbose=True)
32
+ crawler.warmup()
33
+ return crawler
34
+
35
+
36
+ def sanitize_filename(filename):
37
+ """
38
+ Remove invalid characters from a filename to make it Windows-compatible.
39
+
40
+ Args:
41
+ filename (str): The original filename.
42
+
43
+ Returns:
44
+ str: The sanitized filename.
45
+ """
46
+ # Remove invalid characters for Windows file names
47
+ return re.sub(r'[<>:"/\\|?*]', '', filename)
48
+
49
+
50
+ def get_full_size_image_url(session, image_url, base_url):
51
+ """
52
+ Attempt to find the full-size image URL from a thumbnail URL.
53
+
54
+ Args:
55
+ session (requests.Session): The requests session to use.
56
+ image_url (str): The thumbnail image URL.
57
+ base_url (str): The base URL of the page being crawled.
58
+
59
+ Returns:
60
+ str: The full-size image URL if found, otherwise the original URL.
61
+ """
62
+ try:
63
+ response = session.get(image_url)
64
+ response.raise_for_status()
65
+ soup = BeautifulSoup(response.text, 'html.parser')
66
+
67
+ # Look for common full-size image patterns
68
+ full_size_link = soup.find('a', class_=re.compile(r'fullimage|full-size'))
69
+ if full_size_link and full_size_link.get('href'):
70
+ return urljoin(base_url, full_size_link['href'])
71
+
72
+ # If no full-size link is found, return the original URL
73
+ return image_url
74
+ except Exception as e:
75
+ print(f"Error finding full-size image for {image_url}: {str(e)}")
76
+ return image_url
77
+
78
+
79
+ def download_image(session, image_url, save_dir, base_url):
80
+ """
81
+ Download an image from a given URL and save it to the specified directory.
82
+ Attempt to get the full-size image if the URL is a thumbnail.
83
+
84
+ Args:
85
+ session (requests.Session): The requests session to use for downloading.
86
+ image_url (str): The URL of the image to download.
87
+ save_dir (str): The directory to save the downloaded image.
88
+ base_url (str): The base URL of the page being crawled.
89
+ """
90
+ try:
91
+ full_size_url = get_full_size_image_url(session, image_url, base_url)
92
+ image_filename = os.path.basename(full_size_url).split('?')[0]
93
+ sanitized_image_filename = sanitize_filename(image_filename)
94
+ image_path = os.path.join(save_dir, sanitized_image_filename)
95
+
96
+ if os.path.exists(image_path):
97
+ print(f"Image already exists: {image_path}")
98
+ return
99
+
100
+ response = session.get(full_size_url, stream=True)
101
+ response.raise_for_status()
102
+ with open(image_path, 'wb') as image_file:
103
+ for chunk in response.iter_content(chunk_size=8192):
104
+ image_file.write(chunk)
105
+ print(f"Saved full-size image: {image_path}")
106
+ except requests.RequestException as e:
107
+ print(f"Error downloading image {full_size_url}: {str(e)}")
108
+ except IOError as e:
109
+ print(f"Error saving image {full_size_url}: {str(e)}")
110
+
111
+
112
+ def save_result(target_url):
113
+ """
114
+ Crawl a given URL, extract content, and save the results.
115
+
116
+ This function crawls the specified URL, saves the markdown content,
117
+ and downloads all associated images in parallel.
118
+
119
+ Args:
120
+ target_url (str): The URL to crawl and save content from.
121
+ """
122
+ crawler = create_crawler()
123
+ result = crawler.run(url=target_url)
124
+ title = result.metadata.get('title', 'untitled')
125
+ sanitized_title = sanitize_filename(title).replace(" ", "_")
126
+
127
+ # Choose the appropriate base path based on the operating system
128
+ if platform.system() == "Windows":
129
+ base_path = "E:\\knowledgebase\\Saved Websites\\"
130
+ else:
131
+ base_path = "/home/kade/saved_websites/"
132
+
133
+ save_dir = os.path.join(base_path, sanitized_title)
134
+ os.makedirs(save_dir, exist_ok=True)
135
+
136
+ # Save markdown
137
+ save_path = os.path.join(save_dir, f"{sanitized_title}.md")
138
+ #sanitized_markdown = sanitize_citations(result.markdown)
139
+ with open(save_path, "w", encoding="utf-8") as file:
140
+ file.write(result.markdown)
141
+ #file.write(sanitized_markdown)
142
+ print(f"Saved markdown to {save_path}")
143
+
144
+ # Save images in parallel
145
+ if 'images' in result.media and isinstance(result.media['images'], list):
146
+ session = requests.Session()
147
+ headers = {
148
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
149
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
150
+ 'Chrome/91.0.4472.124 Safari/537.36',
151
+ 'Referer': target_url,
152
+ 'Accept': ('image/avif,image/webp,image/apng,image/svg+xml,'
153
+ 'image/*,*/*;q=0.8'),
154
+ 'Accept-Language': 'en-US,en;q=0.9',
155
+ 'Sec-Fetch-Dest': 'image',
156
+ 'Sec-Fetch-Mode': 'no-cors',
157
+ 'Sec-Fetch-Site': 'cross-site',
158
+ }
159
+ session.headers.update(headers)
160
+
161
+ with ThreadPoolExecutor(max_workers=5) as executor:
162
+ futures = []
163
+ for image_data in result.media['images']:
164
+ if 'src' in image_data:
165
+ # Use urljoin to create absolute URLs for image sources
166
+ absolute_image_url = urljoin(target_url, image_data['src'])
167
+ futures.append(executor.submit(download_image,
168
+ session,
169
+ absolute_image_url,
170
+ save_dir,
171
+ target_url)) # Pass target_url as base_url
172
+
173
+ for future in as_completed(futures):
174
+ future.result()
175
+
176
+
177
+ if __name__ == "__main__":
178
+ if len(sys.argv) != 2:
179
+ print("Usage: python crawl.py <URL>")
180
+ else:
181
+ url = sys.argv[1]
182
+ save_result(url)
joy ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ JoyCaption Alpha One
6
+
7
+ This module provides functionality for generating captions for images using a
8
+ combination of CLIP, LLM, and custom image adapters. It supports various
9
+ caption types, tones, and lengths.
10
+
11
+ The main components include:
12
+ - Loading and initializing models (CLIP, LLM, image adapter)
13
+ - Processing images and generating captions
14
+ - Command-line interface for batch processing images in a directory
15
+ """
16
+
17
+ import os
18
+ import argparse
19
+ import re
20
+ from pathlib import Path
21
+ from PIL import Image
22
+ import pillow_jxl
23
+ import torch
24
+ import torchvision.transforms.functional as TVF
25
+ from transformers import (
26
+ AutoModel,
27
+ AutoProcessor,
28
+ AutoTokenizer,
29
+ AutoModelForCausalLM,
30
+ PreTrainedTokenizer,
31
+ PreTrainedTokenizerFast,
32
+ )
33
+ from torch import nn
34
+
35
+ CLIP_PATH = "google/siglip-so400m-patch14-384"
36
+ MODEL_PATH = "meta-llama/Meta-Llama-3.1-8B"
37
+ CHECKPOINT_PATH = Path(__file__).resolve().parent / "9em124t2-499968"
38
+ CAPTION_TYPE_MAP = {
39
+ ("descriptive", "formal", False, False): [
40
+ "Write a descriptive caption for this image in a formal tone."
41
+ ],
42
+ ("descriptive", "formal", False, True): [
43
+ "Write a descriptive caption for this image in a formal tone within "
44
+ "{word_count} words."
45
+ ],
46
+ ("descriptive", "formal", True, False): [
47
+ "Write a {length} descriptive caption for this image in a formal tone."
48
+ ],
49
+ ("descriptive", "informal", False, False): [
50
+ "Write a descriptive caption for this image in a casual tone."
51
+ ],
52
+ ("descriptive", "informal", False, True): [
53
+ "Write a descriptive caption for this image in a casual tone within "
54
+ "{word_count} words."
55
+ ],
56
+ ("descriptive", "informal", True, False): [
57
+ "Write a {length} descriptive caption for this image in a casual tone."
58
+ ],
59
+ ("training_prompt", "formal", False, False): [
60
+ "Write a stable diffusion prompt for this image."
61
+ ],
62
+ ("training_prompt", "formal", False, True): [
63
+ "Write a stable diffusion prompt for this image within {word_count} "
64
+ "words."
65
+ ],
66
+ ("training_prompt", "formal", True, False): [
67
+ "Write a {length} stable diffusion prompt for this image."
68
+ ],
69
+ ("rng-tags", "formal", False, False): [
70
+ "Write a list of Booru tags for this image."
71
+ ],
72
+ ("rng-tags", "formal", False, True): [
73
+ "Write a list of Booru tags for this image within {word_count} words."
74
+ ],
75
+ ("rng-tags", "formal", True, False): [
76
+ "Write a {length} list of Booru tags for this image."
77
+ ],
78
+ }
79
+
80
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
81
+
82
+ class ImageAdapter(nn.Module):
83
+ """
84
+ Custom image adapter module for processing CLIP vision outputs.
85
+
86
+ This module adapts the output of a CLIP vision model to be compatible with
87
+ a text model. It supports optional layer normalization, positional
88
+ embeddings, and deep feature extraction.
89
+
90
+ Args:
91
+ input_features (int): Number of input features from the vision model.
92
+ output_features (int): Number of output features to match the text model.
93
+ ln1 (bool): Whether to use layer normalization.
94
+ pos_emb (bool): Whether to use positional embeddings.
95
+ num_image_tokens (int): Number of image tokens.
96
+ deep_extract (bool): Whether to use deep feature extraction.
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ input_features: int,
102
+ output_features: int,
103
+ ln1: bool,
104
+ pos_emb: bool,
105
+ num_image_tokens: int,
106
+ deep_extract: bool,
107
+ ):
108
+ super().__init__()
109
+ self.deep_extract = deep_extract
110
+
111
+ if self.deep_extract:
112
+ input_features = input_features * 5
113
+
114
+ self.linear1 = nn.Linear(input_features, output_features)
115
+ self.activation = nn.GELU()
116
+ self.linear2 = nn.Linear(output_features, output_features)
117
+ self.ln1 = nn.Identity() if not ln1 else nn.LayerNorm(input_features)
118
+ self.pos_emb = None if not pos_emb else nn.Parameter(
119
+ torch.zeros(num_image_tokens, input_features)
120
+ )
121
+
122
+ self.other_tokens = nn.Embedding(3, output_features)
123
+ self.other_tokens.weight.data.normal_(mean=0.0, std=0.02)
124
+
125
+ def forward(self, vision_outputs: torch.Tensor):
126
+ """
127
+ Forward pass of the image adapter.
128
+
129
+ Args:
130
+ vision_outputs (torch.Tensor): Output tensor from the CLIP vision model.
131
+
132
+ Returns:
133
+ torch.Tensor: Adapted image features.
134
+ """
135
+ if self.deep_extract:
136
+ x = torch.concat((
137
+ vision_outputs[-2],
138
+ vision_outputs[3],
139
+ vision_outputs[7],
140
+ vision_outputs[13],
141
+ vision_outputs[20],
142
+ ), dim=-1)
143
+ assert len(x.shape) == 3, f"Expected 3, got {len(x.shape)}"
144
+ assert x.shape[-1] == vision_outputs[-2].shape[-1] * 5, (
145
+ f"Expected {vision_outputs[-2].shape[-1] * 5}, got {x.shape[-1]}"
146
+ )
147
+ else:
148
+ x = vision_outputs[-2]
149
+
150
+ x = self.ln1(x)
151
+
152
+ if self.pos_emb is not None:
153
+ assert x.shape[-2:] == self.pos_emb.shape, (
154
+ f"Expected {self.pos_emb.shape}, got {x.shape[-2:]}"
155
+ )
156
+ x = x + self.pos_emb
157
+
158
+ x = self.linear1(x)
159
+ x = self.activation(x)
160
+ x = self.linear2(x)
161
+
162
+ other_tokens = self.other_tokens(
163
+ torch.tensor([0, 1], device=self.other_tokens.weight.device).expand(
164
+ x.shape[0], -1
165
+ )
166
+ )
167
+ assert other_tokens.shape == (x.shape[0], 2, x.shape[2]), (
168
+ f"Expected {(x.shape[0], 2, x.shape[2])}, got {other_tokens.shape}"
169
+ )
170
+ x = torch.cat((other_tokens[:, 0:1], x, other_tokens[:, 1:2]), dim=1)
171
+
172
+ return x
173
+
174
+ def get_eot_embedding(self):
175
+ """
176
+ Get the end-of-text embedding.
177
+
178
+ Returns:
179
+ torch.Tensor: The end-of-text embedding.
180
+ """
181
+ return self.other_tokens(
182
+ torch.tensor([2], device=self.other_tokens.weight.device)
183
+ ).squeeze(0)
184
+
185
+ class JoyCaptionModel:
186
+ """
187
+ A class for generating captions for images using CLIP, LLM, and custom image adapters.
188
+
189
+ This class encapsulates the functionality to load and initialize various models
190
+ (CLIP, LLM, image adapter) and use them to process images and generate captions.
191
+ It supports different caption types, tones, and lengths.
192
+
193
+ Attributes:
194
+ clip_model: The CLIP vision model for processing images.
195
+ text_model: The language model for generating captions.
196
+ image_adapter: Custom adapter for processing CLIP vision outputs.
197
+ tokenizer: Tokenizer for the language model.
198
+
199
+ Methods:
200
+ load_models(): Load and initialize all required models.
201
+ process_image(input_image, caption_type, caption_tone, caption_length):
202
+ Process an input image and generate a caption based on specified parameters.
203
+ """
204
+
205
+ def __init__(self):
206
+ self.clip_model = None
207
+ self.text_model = None
208
+ self.image_adapter = None
209
+ self.tokenizer = None
210
+
211
+ def load_models(self):
212
+ """
213
+ Load and initialize all required models (CLIP, LLM, image adapter).
214
+ """
215
+ print("Loading CLIP")
216
+ self.clip_model = AutoModel.from_pretrained(CLIP_PATH)
217
+ self.clip_model = self.clip_model.vision_model
218
+
219
+ if (CHECKPOINT_PATH / "clip_model.pt").exists():
220
+ print("Loading VLM's custom vision model")
221
+ checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu')
222
+ checkpoint = {k.replace("_orig_mod.module.", ""): v for k, v in checkpoint.items()}
223
+ self.clip_model.load_state_dict(checkpoint)
224
+ del checkpoint
225
+
226
+ self.clip_model.eval()
227
+ self.clip_model.requires_grad_(False)
228
+ self.clip_model.to("cuda")
229
+
230
+ print("Loading tokenizer")
231
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, use_fast=False)
232
+ assert isinstance(self.tokenizer, PreTrainedTokenizer) or isinstance(
233
+ self.tokenizer, PreTrainedTokenizerFast
234
+ ), f"Tokenizer is of type {type(self.tokenizer)}"
235
+
236
+ print("Loading LLM")
237
+ if (CHECKPOINT_PATH / "text_model").exists():
238
+ print("Loading VLM's custom text model")
239
+ self.text_model = AutoModelForCausalLM.from_pretrained(
240
+ CHECKPOINT_PATH / "text_model",
241
+ device_map=0,
242
+ torch_dtype=torch.bfloat16
243
+ )
244
+ else:
245
+ self.text_model = AutoModelForCausalLM.from_pretrained(
246
+ MODEL_PATH,
247
+ device_map="auto",
248
+ torch_dtype=torch.bfloat16
249
+ )
250
+
251
+ self.text_model.eval()
252
+
253
+ print("Loading image adapter")
254
+ self.image_adapter = ImageAdapter(
255
+ self.clip_model.config.hidden_size,
256
+ self.text_model.config.hidden_size,
257
+ False,
258
+ False,
259
+ 38,
260
+ False
261
+ )
262
+ self.image_adapter.load_state_dict(
263
+ torch.load(CHECKPOINT_PATH / "image_adapter.pt", map_location="cpu")
264
+ )
265
+ self.image_adapter.eval()
266
+ self.image_adapter.to("cuda")
267
+
268
+ @torch.no_grad()
269
+ def process_image(self,
270
+ input_image: Image.Image,
271
+ caption_type: str,
272
+ caption_tone: str,
273
+ caption_length: str | int,
274
+ custom_prompt: str = None) -> str:
275
+ """
276
+ Process an input image and generate a caption based on specified parameters.
277
+ """
278
+ torch.cuda.empty_cache()
279
+
280
+ if caption_type == "custom" and custom_prompt:
281
+ prompt_str = custom_prompt
282
+ else:
283
+ prompt_str = self._get_prompt_string(caption_type, caption_tone, caption_length)
284
+ print(f"Prompt: {prompt_str}")
285
+
286
+ pixel_values = self._preprocess_image(input_image)
287
+ prompt = self._tokenize_prompt(prompt_str)
288
+
289
+ embedded_images = self._embed_image(pixel_values)
290
+ inputs_embeds, input_ids, attention_mask = self._construct_inputs(embedded_images, prompt)
291
+
292
+ generate_ids = self._generate_caption(inputs_embeds, input_ids, attention_mask)
293
+ caption = self._decode_caption(generate_ids, input_ids)
294
+
295
+ return caption.strip()
296
+
297
+ def _get_prompt_string(self, caption_type, caption_tone, caption_length):
298
+ length = None if caption_length == "any" else caption_length
299
+
300
+ if isinstance(length, str):
301
+ try:
302
+ length = int(length)
303
+ except ValueError:
304
+ pass
305
+
306
+ if caption_type in {"rng-tags", "training_prompt"}:
307
+ caption_tone = "formal"
308
+
309
+ prompt_key = (
310
+ caption_type,
311
+ caption_tone,
312
+ isinstance(length, str),
313
+ isinstance(length, int)
314
+ )
315
+ if prompt_key not in CAPTION_TYPE_MAP:
316
+ raise ValueError(f"Invalid caption type: {prompt_key}")
317
+
318
+ prompt_str = CAPTION_TYPE_MAP[prompt_key][0].format(
319
+ length=length, word_count=length
320
+ )
321
+ return prompt_str
322
+
323
+ def _preprocess_image(self, input_image):
324
+ image = input_image.resize((384, 384), Image.LANCZOS)
325
+ pixel_values = TVF.pil_to_tensor(image).unsqueeze(0) / 255.0
326
+ pixel_values = TVF.normalize(pixel_values, [0.5], [0.5])
327
+ pixel_values = pixel_values.to('cuda')
328
+ return pixel_values
329
+
330
+ def _tokenize_prompt(self, prompt_str):
331
+ prompt = self.tokenizer.encode(
332
+ prompt_str,
333
+ return_tensors='pt',
334
+ padding=False,
335
+ truncation=False,
336
+ add_special_tokens=False
337
+ )
338
+ return prompt
339
+
340
+ def _embed_image(self, pixel_values):
341
+ with torch.amp.autocast_mode.autocast('cuda', enabled=True):
342
+ vision_outputs = self.clip_model(pixel_values=pixel_values, output_hidden_states=True)
343
+ image_features = vision_outputs.hidden_states
344
+ embedded_images = self.image_adapter(image_features)
345
+ embedded_images = embedded_images.to('cuda')
346
+ return embedded_images
347
+
348
+ def _construct_inputs(self, embedded_images, prompt):
349
+ prompt_embeds = self.text_model.model.embed_tokens(prompt.to('cuda'))
350
+ assert prompt_embeds.shape == (1, prompt.shape[1], self.text_model.config.hidden_size), (
351
+ f"Prompt shape is {prompt_embeds.shape}, expected "
352
+ f"{(1, prompt.shape[1], self.text_model.config.hidden_size)}"
353
+ )
354
+
355
+ embedded_bos = self.text_model.model.embed_tokens(
356
+ torch.tensor([[self.tokenizer.bos_token_id]],
357
+ device=self.text_model.device,
358
+ dtype=torch.int64)
359
+ )
360
+
361
+ eot_embed = self.image_adapter.get_eot_embedding().unsqueeze(0).to(
362
+ dtype=self.text_model.dtype
363
+ )
364
+
365
+ inputs_embeds = torch.cat([
366
+ embedded_bos.expand(embedded_images.shape[0], -1, -1),
367
+ embedded_images.to(dtype=embedded_bos.dtype),
368
+ prompt_embeds.expand(embedded_images.shape[0], -1, -1),
369
+ eot_embed.expand(embedded_images.shape[0], -1, -1),
370
+ ], dim=1)
371
+
372
+ input_ids = torch.cat([
373
+ torch.tensor([[self.tokenizer.bos_token_id]], dtype=torch.long),
374
+ torch.zeros((1, embedded_images.shape[1]), dtype=torch.long),
375
+ prompt,
376
+ torch.tensor([[self.tokenizer.eos_token_id]], dtype=torch.long),
377
+ ], dim=1).to('cuda')
378
+ attention_mask = torch.ones_like(input_ids)
379
+
380
+ return inputs_embeds, input_ids, attention_mask
381
+
382
+ def _generate_caption(self, inputs_embeds, input_ids, attention_mask):
383
+ generate_ids = self.text_model.generate(
384
+ input_ids,
385
+ inputs_embeds=inputs_embeds,
386
+ attention_mask=attention_mask,
387
+ max_new_tokens=300,
388
+ do_sample=True,
389
+ suppress_tokens=None
390
+ )
391
+ return generate_ids
392
+
393
+ def _decode_caption(self, generate_ids, input_ids):
394
+ generate_ids = generate_ids[:, input_ids.shape[1]:]
395
+
396
+ if (generate_ids[0][-1] == self.tokenizer.eos_token_id or
397
+ generate_ids[0][-1] == self.tokenizer.convert_tokens_to_ids("<|eot_id|>")):
398
+ generate_ids = generate_ids[:, :-1]
399
+
400
+ caption = self.tokenizer.batch_decode(
401
+ generate_ids,
402
+ skip_special_tokens=False,
403
+ clean_up_tokenization_spaces=False
404
+ )[0]
405
+ return caption
406
+
407
+
408
+ def main():
409
+ """Generate captions for images in a directory and save them as .caption files."""
410
+ parser = argparse.ArgumentParser(
411
+ description="Generate captions for images in a directory and save them as .caption files."
412
+ )
413
+ parser.add_argument("directory", type=str, help="Target directory containing images.")
414
+ parser.add_argument(
415
+ "--caption_type",
416
+ type=str,
417
+ default="descriptive",
418
+ choices=["descriptive", "training_prompt", "rng-tags", "custom"],
419
+ help="Type of caption to generate."
420
+ )
421
+ parser.add_argument(
422
+ "--caption_tone",
423
+ type=str,
424
+ default="formal",
425
+ choices=["formal", "informal"],
426
+ help="Tone of the caption."
427
+ )
428
+ parser.add_argument(
429
+ "--caption_length",
430
+ type=str,
431
+ default="any",
432
+ help="Length of the caption."
433
+ )
434
+ parser.add_argument(
435
+ "--dont-strip-commas",
436
+ action="store_true",
437
+ help="If set, commas will not be stripped from the generated captions."
438
+ )
439
+ parser.add_argument(
440
+ "--custom_prompt",
441
+ type=str,
442
+ help="Custom prompt for the captioner. Use with --caption_type custom."
443
+ )
444
+ parser.add_argument(
445
+ '--add-commas-to-sentence-ends',
446
+ action='store_true',
447
+ help='Add commas after periods in sentences'
448
+ )
449
+ parser.add_argument(
450
+ '--feed-from-tags',
451
+ action='store_true',
452
+ help='Use .txt files with the same base filename as the images as input to the captioner'
453
+ )
454
+
455
+ args = parser.parse_args()
456
+
457
+ # Initialize and load models
458
+ joy_caption_model = JoyCaptionModel()
459
+ joy_caption_model.load_models()
460
+
461
+ # Validate custom prompt usage
462
+ if args.caption_type == "custom" and not args.custom_prompt:
463
+ parser.error("--custom_prompt is required when using --caption_type custom")
464
+ elif args.caption_type != "custom" and args.custom_prompt:
465
+ parser.error("--custom_prompt can only be used with --caption_type custom")
466
+
467
+ image_extensions = {".webp", ".png", ".jpeg", ".jpg", ".jxl"}
468
+ for image_path in Path(args.directory).rglob("*"):
469
+ if image_path.suffix.lower() in image_extensions:
470
+ caption_file = image_path.with_suffix('.caption')
471
+
472
+ # Skip if the caption file already exists
473
+ if caption_file.exists():
474
+ print(f"Skipping {image_path}: Caption file already exists.")
475
+ continue
476
+
477
+ input_image = Image.open(image_path).convert("RGB")
478
+
479
+ # Use custom prompt if specified
480
+ if args.caption_type == "custom":
481
+ caption = joy_caption_model.process_image(
482
+ input_image,
483
+ "custom",
484
+ args.caption_tone,
485
+ args.caption_length,
486
+ custom_prompt=args.custom_prompt
487
+ )
488
+ else:
489
+ # Check for --feed-from-tags
490
+ if args.feed_from_tags:
491
+ tag_file = find_tag_file(image_path)
492
+ if tag_file:
493
+ with open(tag_file, 'r', encoding='utf-8') as f:
494
+ custom_prompt = f.read().strip()
495
+ caption = joy_caption_model.process_image(
496
+ input_image,
497
+ "custom",
498
+ args.caption_tone,
499
+ args.caption_length,
500
+ custom_prompt=custom_prompt
501
+ )
502
+ else:
503
+ caption = joy_caption_model.process_image(
504
+ input_image,
505
+ args.caption_type,
506
+ args.caption_tone,
507
+ args.caption_length
508
+ )
509
+ else:
510
+ caption = joy_caption_model.process_image(
511
+ input_image,
512
+ args.caption_type,
513
+ args.caption_tone,
514
+ args.caption_length
515
+ )
516
+
517
+ # Strip commas if the --dont-strip-commas flag is not set
518
+ if not args.dont_strip_commas:
519
+ # Existing comma stripping logic
520
+ caption = re.sub(r',\s*([^\d])', r' \1', caption)
521
+
522
+ # New feature: Add commas after periods if specified
523
+ if args.add_commas_to_sentence_ends:
524
+ caption = re.sub(r'(\.)(\s+)([A-Z])', r'\1,\2\3', caption)
525
+
526
+ print(f"Caption for {image_path}:\n{caption}\n")
527
+
528
+ # Save the caption to a .caption file
529
+ with open(caption_file, 'w', encoding='utf-8') as f:
530
+ f.write(caption)
531
+ print(f"Caption saved to {caption_file}")
532
+
533
+ def find_tag_file(image_path):
534
+ """
535
+ Find the corresponding .txt file for the given image path.
536
+ Handles cases where the image has a -(number) suffix.
537
+ """
538
+ base_name = image_path.stem
539
+ tag_file = image_path.with_suffix('.txt')
540
+
541
+ if tag_file.exists():
542
+ return tag_file
543
+
544
+ # Handle -(number) suffix
545
+ match = re.match(r'(.+)-\d+$', base_name)
546
+ if match:
547
+ base_name = match.group(1)
548
+ tag_file = image_path.with_name(base_name).with_suffix('.txt')
549
+ if tag_file.exists():
550
+ return tag_file
551
+
552
+ return None
553
+
554
+ if __name__ == "__main__":
555
+ main()
jtp2 ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ JTP2 (Joint Tagger Project 2) Image Classification Script
6
+ This script implements a multi-label classifier for furry images using the
7
+ PILOT2 model. It processes images, generates tags, and saves the results. The
8
+ model is based on a Vision Transformer architecture and uses a custom GatedHead
9
+ for classification.
10
+ Key features:
11
+ - Image preprocessing and transformation
12
+ - Model inference using PILOT2
13
+ - Tag generation with customizable threshold
14
+ - Batch processing of image directories
15
+ - Saving results as text files alongside images
16
+ Usage:
17
+ python jtp2.py <directory> [--threshold <float>]
18
+ """
19
+ import os
20
+ import json
21
+ import argparse
22
+ from PIL import Image
23
+ import safetensors.torch
24
+ import timm
25
+ from timm.models import VisionTransformer
26
+ import torch
27
+ from torchvision.transforms import transforms
28
+ from torchvision.transforms import InterpolationMode
29
+ import torchvision.transforms.functional as TF
30
+ import pillow_jxl
31
+
32
+ torch.set_grad_enabled(False)
33
+
34
+
35
+ class Fit(torch.nn.Module):
36
+ """
37
+ A custom transform module for resizing and padding images.
38
+ Args:
39
+ bounds (tuple[int, int] | int): The target dimensions for the image.
40
+ interpolation (InterpolationMode): The interpolation method for resizing.
41
+ grow (bool): Whether to allow upscaling of images.
42
+ pad (float | None): The padding value to use if padding is applied.
43
+ """
44
+ def __init__(
45
+ self,
46
+ bounds: tuple[int, int] | int,
47
+ interpolation=InterpolationMode.LANCZOS,
48
+ grow: bool = True,
49
+ pad: float | None = None
50
+ ):
51
+ super().__init__()
52
+ self.bounds = (bounds, bounds) if isinstance(bounds, int) else bounds
53
+ self.interpolation = interpolation
54
+ self.grow = grow
55
+ self.pad = pad
56
+
57
+ def forward(self, img: Image) -> Image:
58
+ """
59
+ Applies the Fit transform to the input image.
60
+ Args:
61
+ img (Image): The input PIL Image.
62
+ Returns:
63
+ Image: The transformed PIL Image.
64
+ """
65
+ wimg, himg = img.size
66
+ hbound, wbound = self.bounds
67
+ hscale = hbound / himg
68
+ wscale = wbound / wimg
69
+ if not self.grow:
70
+ hscale = min(hscale, 1.0)
71
+ wscale = min(wscale, 1.0)
72
+ scale = min(hscale, wscale)
73
+ if scale == 1.0:
74
+ return img
75
+ hnew = min(round(himg * scale), hbound)
76
+ wnew = min(round(wimg * scale), wbound)
77
+ img = TF.resize(img, (hnew, wnew), self.interpolation)
78
+ if self.pad is None:
79
+ return img
80
+ hpad = hbound - hnew
81
+ wpad = wbound - wnew
82
+ tpad = hpad
83
+ bpad = hpad - tpad
84
+ lpad = wpad
85
+ rpad = wpad - lpad
86
+ return TF.pad(img, (lpad, tpad, rpad, bpad), self.pad)
87
+ def __repr__(self) -> str:
88
+ """
89
+ Returns a string representation of the Fit module.
90
+ Returns:
91
+ str: A string describing the module's parameters.
92
+ """
93
+ return (
94
+ f"{self.__class__.__name__}(bounds={self.bounds}, "
95
+ f"interpolation={self.interpolation.value}, grow={self.grow}, "
96
+ f"pad={self.pad})"
97
+ )
98
+
99
+
100
+ class CompositeAlpha(torch.nn.Module):
101
+ """
102
+ A module for compositing images with alpha channels over a background color.
103
+ Args:
104
+ background (tuple[float, float, float] | float): The background color to
105
+ use for compositing.
106
+ """
107
+ def __init__(self, background: tuple[float, float, float] | float):
108
+ super().__init__()
109
+ self.background = (
110
+ (background, background, background)
111
+ if isinstance(background, float)
112
+ else background
113
+ )
114
+ self.background = torch.tensor(self.background).unsqueeze(1).unsqueeze(2)
115
+
116
+ def forward(self, img: torch.Tensor) -> torch.Tensor:
117
+ """
118
+ Applies alpha compositing to the input image tensor.
119
+ Args:
120
+ img (torch.Tensor): The input image tensor.
121
+ Returns:
122
+ torch.Tensor: The composited image tensor.
123
+ """
124
+ if img.shape[-3] == 3:
125
+ return img
126
+ alpha = img[..., 3, None, :, :]
127
+ img[..., :3, :, :] *= alpha
128
+ background = self.background.expand(-1, img.shape[-2], img.shape[-1])
129
+ if background.ndim == 1:
130
+ background = background[:, None, None]
131
+ elif background.ndim == 2:
132
+ background = background[None, :, :]
133
+ img[..., :3, :, :] += (1.0 - alpha) * background
134
+ return img[..., :3, :, :]
135
+
136
+ def __repr__(self) -> str:
137
+ """
138
+ Returns a string representation of the CompositeAlpha module.
139
+ Returns:
140
+ str: A string describing the module's parameters.
141
+ """
142
+ return f"{self.__class__.__name__}(background={self.background})"
143
+
144
+
145
+ transform = transforms.Compose([
146
+ Fit((384, 384)),
147
+ transforms.ToTensor(),
148
+ CompositeAlpha(0.5),
149
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
150
+ transforms.CenterCrop((384, 384)),
151
+ ])
152
+ model = timm.create_model(
153
+ "vit_so400m_patch14_siglip_384.webli",
154
+ pretrained=False,
155
+ num_classes=9083
156
+ ) # type: VisionTransformer
157
+
158
+
159
+ class GatedHead(torch.nn.Module):
160
+ """
161
+ A custom head module with gating mechanism for the classifier.
162
+ Args:
163
+ num_features (int): The number of input features.
164
+ num_classes (int): The number of output classes.
165
+ """
166
+ def __init__(self, num_features: int, num_classes: int):
167
+ super().__init__()
168
+ self.num_classes = num_classes
169
+ self.linear = torch.nn.Linear(num_features, num_classes * 2)
170
+ self.act = torch.nn.Sigmoid()
171
+ self.gate = torch.nn.Sigmoid()
172
+
173
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
174
+ """
175
+ Applies the gated head to the input tensor.
176
+ Args:
177
+ x (torch.Tensor): The input tensor.
178
+ Returns:
179
+ torch.Tensor: The output tensor after applying the gated head.
180
+ """
181
+ x = self.linear(x)
182
+ x = self.act(x[:, :self.num_classes]) * self.gate(x[:, self.num_classes:])
183
+ return x
184
+
185
+
186
+ model.head = GatedHead(min(model.head.weight.shape), 9083)
187
+ safetensors.torch.load_model(
188
+ model, "/home/kade/source/repos/JTP2/JTP_PILOT2-e3-vit_so400m_patch14_siglip_384.safetensors"
189
+ )
190
+ if torch.cuda.is_available():
191
+ model.cuda()
192
+ if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
193
+ model.to(dtype=torch.float16, memory_format=torch.channels_last)
194
+ model.eval()
195
+ with open("/home/kade/source/repos/JTP2/tags.json", "r", encoding="utf-8") as file:
196
+ tags = json.load(file) # type: dict
197
+ allowed_tags = list(tags.keys())
198
+ for idx, tag in enumerate(allowed_tags):
199
+ allowed_tags[idx] = tag.replace("_", " ")
200
+ sorted_tag_score = {}
201
+
202
+
203
+ def run_classifier(image, threshold):
204
+ """
205
+ Runs the classifier on a single image and returns tags based on the threshold.
206
+ Args:
207
+ image (PIL.Image): The input image.
208
+ threshold (float): The probability threshold for including tags.
209
+ Returns:
210
+ tuple: A tuple containing the comma-separated tags and a dictionary of
211
+ tag probabilities.
212
+ """
213
+ global sorted_tag_score
214
+ img = image.convert('RGBA')
215
+ tensor = transform(img).unsqueeze(0)
216
+ if torch.cuda.is_available():
217
+ tensor = tensor.cuda()
218
+ if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
219
+ tensor = tensor.to(dtype=torch.float16, memory_format=torch.channels_last)
220
+ with torch.no_grad():
221
+ probits = model(tensor)[0].cpu()
222
+ values, indices = probits.topk(250)
223
+ tag_score = dict()
224
+ for i in range(indices.size(0)):
225
+ tag_score[allowed_tags[indices[i]]] = values[i].item()
226
+ sorted_tag_score = dict(
227
+ sorted(tag_score.items(), key=lambda item: item[1], reverse=True)
228
+ )
229
+ return create_tags(threshold)
230
+
231
+ def create_tags(threshold):
232
+ """
233
+ Creates a list of tags based on the current sorted_tag_score and the given
234
+ threshold.
235
+ Args:
236
+ threshold (float): The probability threshold for including tags.
237
+ Returns:
238
+ tuple: A tuple containing the comma-separated tags and a dictionary of
239
+ filtered tag probabilities.
240
+ """
241
+ global sorted_tag_score
242
+ filtered_tag_score = {
243
+ key: value for key, value in sorted_tag_score.items() if value > threshold
244
+ }
245
+ text_no_impl = ", ".join(filtered_tag_score.keys())
246
+ return text_no_impl, filtered_tag_score
247
+
248
+ def process_directory(directory, threshold):
249
+ """
250
+ Processes all images in a directory and its subdirectories, generating tags
251
+ for each image.
252
+ Args:
253
+ directory (str): The path to the directory containing images.
254
+ threshold (float): The probability threshold for including tags.
255
+ Returns:
256
+ dict: A dictionary mapping image paths to their generated tags.
257
+ """
258
+ results = {}
259
+ for root, _, files in os.walk(directory):
260
+ for file in files:
261
+ if file.lower().endswith(('.jpg', '.jpeg', '.png', '.jxl')):
262
+ image_path = os.path.join(root, file)
263
+ text_file_path = os.path.splitext(image_path)[0] + ".txt"
264
+
265
+ # Skip if a corresponding .txt file already exists
266
+ if os.path.exists(text_file_path):
267
+ continue
268
+
269
+ image = Image.open(image_path)
270
+ tags, _ = run_classifier(image, threshold)
271
+ results[image_path] = tags
272
+
273
+ # Save tags to a text file with the same name as the image, using UTF-8 encoding
274
+ with open(text_file_path, "w", encoding="utf-8") as text_file:
275
+ text_file.write(tags)
276
+ return results
277
+
278
+
279
+ if __name__ == "__main__":
280
+ parser = argparse.ArgumentParser(
281
+ description="Run inference on a directory of images."
282
+ )
283
+ parser.add_argument("directory", type=str, help="Target directory containing images.")
284
+ parser.add_argument(
285
+ "--threshold", type=float, default=0.2, help="Threshold for tag filtering."
286
+ )
287
+ args = parser.parse_args()
288
+ results = process_directory(args.directory, args.threshold)
289
+ for image_path, tags in results.items():
290
+ print(f"{image_path}: {tags}")
jtp2_overwrite ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ JTP2 (Joint Tagger Project 2) Image Classification Script
6
+
7
+ This script implements a multi-label classifier for furry images using the
8
+ PILOT2 model. It processes images, generates tags, and saves the results. The
9
+ model is based on a Vision Transformer architecture and uses a custom GatedHead
10
+ for classification.
11
+
12
+ Key features:
13
+ - Image preprocessing and transformation
14
+ - Model inference using PILOT2
15
+ - Tag generation with customizable threshold
16
+ - Batch processing of image directories
17
+ - Saving results as text files alongside images
18
+
19
+ Usage:
20
+ python jtp2.py <directory> [--threshold <float>]
21
+ """
22
+
23
+ import os
24
+ import json
25
+ import argparse
26
+ from PIL import Image
27
+ import safetensors.torch
28
+ import timm
29
+ from timm.models import VisionTransformer
30
+ import torch
31
+ from torchvision.transforms import transforms
32
+ from torchvision.transforms import InterpolationMode
33
+ import torchvision.transforms.functional as TF
34
+ import pillow_jxl
35
+
36
+
37
+ class Fit(torch.nn.Module):
38
+ """
39
+ A custom transform module for resizing and padding images.
40
+
41
+ Args:
42
+ bounds (tuple[int, int] | int): The target dimensions for the image.
43
+ interpolation (InterpolationMode): The interpolation method for resizing.
44
+ grow (bool): Whether to allow upscaling of images.
45
+ pad (float | None): The padding value to use if padding is applied.
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ bounds: tuple[int, int] | int,
51
+ interpolation=InterpolationMode.LANCZOS,
52
+ grow: bool = True,
53
+ pad: float | None = None
54
+ ):
55
+ super().__init__()
56
+ self.bounds = (bounds, bounds) if isinstance(bounds, int) else bounds
57
+ self.interpolation = interpolation
58
+ self.grow = grow
59
+ self.pad = pad
60
+
61
+ def forward(self, img: Image) -> Image:
62
+ """
63
+ Applies the Fit transform to the input image.
64
+
65
+ Args:
66
+ img (Image): The input PIL Image.
67
+
68
+ Returns:
69
+ Image: The transformed PIL Image.
70
+ """
71
+ wimg, himg = img.size
72
+ hbound, wbound = self.bounds
73
+ hscale = hbound / himg
74
+ wscale = wbound / wimg
75
+ if not self.grow:
76
+ hscale = min(hscale, 1.0)
77
+ wscale = min(wscale, 1.0)
78
+ scale = min(hscale, wscale)
79
+ if scale == 1.0:
80
+ return img
81
+ hnew = min(round(himg * scale), hbound)
82
+ wnew = min(round(wimg * scale), wbound)
83
+ img = TF.resize(img, (hnew, wnew), self.interpolation)
84
+ if self.pad is None:
85
+ return img
86
+ hpad = hbound - hnew
87
+ wpad = wbound - wnew
88
+ tpad = hpad // 2
89
+ bpad = hpad - tpad
90
+ lpad = wpad // 2
91
+ rpad = wpad - lpad
92
+ return TF.pad(img, (lpad, tpad, rpad, bpad), self.pad)
93
+
94
+ def __repr__(self) -> str:
95
+ """
96
+ Returns a string representation of the Fit module.
97
+
98
+ Returns:
99
+ str: A string describing the module's parameters.
100
+ """
101
+ return (
102
+ f"{self.__class__.__name__}(bounds={self.bounds}, "
103
+ f"interpolation={self.interpolation.value}, grow={self.grow}, "
104
+ f"pad={self.pad})"
105
+ )
106
+
107
+
108
+ class CompositeAlpha(torch.nn.Module):
109
+ """
110
+ A module for compositing images with alpha channels over a background color.
111
+
112
+ Args:
113
+ background (tuple[float, float, float] | float): The background color to
114
+ use for compositing.
115
+ """
116
+
117
+ def __init__(self, background: tuple[float, float, float] | float):
118
+ super().__init__()
119
+ self.background = (
120
+ (background, background, background)
121
+ if isinstance(background, float)
122
+ else background
123
+ )
124
+ self.background = torch.tensor(self.background).unsqueeze(1).unsqueeze(2)
125
+
126
+ def forward(self, img: torch.Tensor) -> torch.Tensor:
127
+ """
128
+ Applies alpha compositing to the input image tensor.
129
+
130
+ Args:
131
+ img (torch.Tensor): The input image tensor.
132
+
133
+ Returns:
134
+ torch.Tensor: The composited image tensor.
135
+ """
136
+ if img.shape[-3] == 3:
137
+ return img
138
+ alpha = img[..., 3, None, :, :]
139
+ img[..., :3, :, :] *= alpha
140
+ background = self.background.expand(-1, img.shape[-2], img.shape[-1])
141
+ if background.ndim == 1:
142
+ background = background[:, None, None]
143
+ elif background.ndim == 2:
144
+ background = background[None, :, :]
145
+ img[..., :3, :, :] += (1.0 - alpha) * background
146
+ return img[..., :3, :, :]
147
+
148
+ def __repr__(self) -> str:
149
+ """
150
+ Returns a string representation of the CompositeAlpha module.
151
+
152
+ Returns:
153
+ str: A string describing the module's parameters.
154
+ """
155
+ return f"{self.__class__.__name__}(background={self.background})"
156
+
157
+
158
+ transform = transforms.Compose([
159
+ Fit((384, 384)),
160
+ transforms.ToTensor(),
161
+ CompositeAlpha(0.5),
162
+ transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
163
+ transforms.CenterCrop((384, 384)),
164
+ ])
165
+
166
+ model = timm.create_model(
167
+ "vit_so400m_patch14_siglip_384.webli",
168
+ pretrained=False,
169
+ num_classes=9083
170
+ ) # type: VisionTransformer
171
+
172
+
173
+ class GatedHead(torch.nn.Module):
174
+ """
175
+ A custom head module with gating mechanism for the classifier.
176
+
177
+ Args:
178
+ num_features (int): The number of input features.
179
+ num_classes (int): The number of output classes.
180
+ """
181
+
182
+ def __init__(self, num_features: int, num_classes: int):
183
+ super().__init__()
184
+ self.num_classes = num_classes
185
+ self.linear = torch.nn.Linear(num_features, num_classes * 2)
186
+ self.act = torch.nn.Sigmoid()
187
+ self.gate = torch.nn.Sigmoid()
188
+
189
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
190
+ """
191
+ Applies the gated head to the input tensor.
192
+
193
+ Args:
194
+ x (torch.Tensor): The input tensor.
195
+
196
+ Returns:
197
+ torch.Tensor: The output tensor after applying the gated head.
198
+ """
199
+ x = self.linear(x)
200
+ x = self.act(x[:, :self.num_classes]) * self.gate(x[:, self.num_classes:])
201
+ return x
202
+
203
+
204
+ model.head = GatedHead(min(model.head.weight.shape), 9083)
205
+ safetensors.torch.load_model(
206
+ model, "/home/kade/source/repos/JTP2/JTP_PILOT2-e3-vit_so400m_patch14_siglip_384.safetensors"
207
+ )
208
+
209
+ if torch.cuda.is_available():
210
+ model.cuda()
211
+ if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
212
+ model.to(dtype=torch.float16, memory_format=torch.channels_last)
213
+
214
+ model.eval()
215
+
216
+ with open("/home/kade/source/repos/JTP2/tags.json", "r", encoding="utf-8") as file:
217
+ tags = json.load(file) # type: dict
218
+ allowed_tags = list(tags.keys())
219
+
220
+ for idx, tag in enumerate(allowed_tags):
221
+ allowed_tags[idx] = tag.replace("_", " ")
222
+
223
+ sorted_tag_score = {}
224
+
225
+
226
+ def run_classifier(image, threshold):
227
+ """
228
+ Runs the classifier on a single image and returns tags based on the threshold.
229
+
230
+ Args:
231
+ image (PIL.Image): The input image.
232
+ threshold (float): The probability threshold for including tags.
233
+
234
+ Returns:
235
+ tuple: A tuple containing the comma-separated tags and a dictionary of
236
+ tag probabilities.
237
+ """
238
+ global sorted_tag_score
239
+ img = image.convert('RGBA')
240
+ tensor = transform(img).unsqueeze(0)
241
+ if torch.cuda.is_available():
242
+ tensor = tensor.cuda()
243
+ if torch.cuda.get_device_capability()[0] >= 7: # tensor cores
244
+ tensor = tensor.to(dtype=torch.float16, memory_format=torch.channels_last)
245
+ with torch.no_grad():
246
+ probits = model(tensor)[0].cpu()
247
+ values, indices = probits.topk(250)
248
+ tag_score = dict()
249
+ for i in range(indices.size(0)):
250
+ tag_score[allowed_tags[indices[i]]] = values[i].item()
251
+ sorted_tag_score = dict(
252
+ sorted(tag_score.items(), key=lambda item: item[1], reverse=True)
253
+ )
254
+ return create_tags(threshold)
255
+
256
+
257
+ def create_tags(threshold):
258
+ """
259
+ Creates a list of tags based on the current sorted_tag_score and the given
260
+ threshold.
261
+
262
+ Args:
263
+ threshold (float): The probability threshold for including tags.
264
+
265
+ Returns:
266
+ tuple: A tuple containing the comma-separated tags and a dictionary of
267
+ filtered tag probabilities.
268
+ """
269
+ global sorted_tag_score
270
+ filtered_tag_score = {
271
+ key: value for key, value in sorted_tag_score.items() if value > threshold
272
+ }
273
+ text_no_impl = ", ".join(filtered_tag_score.keys())
274
+ return text_no_impl, filtered_tag_score
275
+
276
+
277
+ def process_directory(directory, threshold):
278
+ """
279
+ Processes all images in a directory and its subdirectories, generating tags
280
+ for each image.
281
+
282
+ Args:
283
+ directory (str): The path to the directory containing images.
284
+ threshold (float): The probability threshold for including tags.
285
+
286
+ Returns:
287
+ dict: A dictionary mapping image paths to their generated tags.
288
+ """
289
+ results = {}
290
+ for root, _, files in os.walk(directory):
291
+ for file in files:
292
+ if file.lower().endswith(('.jpg', '.jpeg', '.png', '.jxl')):
293
+ image_path = os.path.join(root, file)
294
+ image = Image.open(image_path)
295
+ tags, _ = run_classifier(image, threshold)
296
+ results[image_path] = tags
297
+ # Save tags to a text file with the same name as the image
298
+ text_file_path = os.path.splitext(image_path)[0] + ".txt"
299
+ with open(text_file_path, "w", encoding="utf-8") as text_file:
300
+ text_file.write(tags)
301
+ return results
302
+
303
+
304
+ if __name__ == "__main__":
305
+ parser = argparse.ArgumentParser(
306
+ description="Run inference on a directory of images."
307
+ )
308
+ parser.add_argument("directory", type=str, help="Target directory containing images.")
309
+ parser.add_argument(
310
+ "--threshold", type=float, default=0.2, help="Threshold for tag filtering."
311
+ )
312
+ args = parser.parse_args()
313
+
314
+ results = process_directory(args.directory, args.threshold)
315
+ for image_path, tags in results.items():
316
+ print(f"{image_path}: {tags}")
paper-qa.code-workspace ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "folders": [
3
+ {
4
+ "path": "."
5
+ },
6
+ {
7
+ "path": "../miniconda3/lib/python3.12/site-packages/paperqa"
8
+ }
9
+ ],
10
+ "settings": {}
11
+ }
papers_please ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import subprocess
5
+ import sys
6
+ from pathlib import Path
7
+ import pickle
8
+ from paperqa import Settings, Docs
9
+
10
+ local_llm_config = {
11
+ "model_list": [
12
+ {
13
+ "model_name": "ollama/llama3.1",
14
+ "litellm_params": {
15
+ "model": "ollama/llama3.1",
16
+ },
17
+ },
18
+ ]
19
+ }
20
+ local_emb_config = {
21
+ "model_list": [
22
+ {
23
+ "model_name": "ollama/mxbai-embed-large",
24
+ "litellm_params": {
25
+ "model": "ollama/mxbai-embed-large",
26
+ },
27
+ }
28
+ ]
29
+ }
30
+
31
+ settings = Settings(
32
+ llm="ollama/llama3.1",
33
+ llm_config=local_llm_config,
34
+ summary_llm="ollama/llama3.1",
35
+ summary_llm_config=local_llm_config,
36
+ embedding="ollama/mxbai-embed-large",
37
+ embedding_config=local_emb_config,
38
+ )
39
+
40
+ def find_main_tex_file(folder_path: Path):
41
+ """
42
+ Find the main LaTeX file in the given folder.
43
+
44
+ This function searches for a .tex file that is likely to be the main file
45
+ of a LaTeX project. It first checks for common names like 'main.tex',
46
+ then looks for files containing '\\documentclass', and finally returns
47
+ the first .tex file if no other criteria are met.
48
+
49
+ Args:
50
+ folder_path (Path): The path to the folder to search in.
51
+
52
+ Returns:
53
+ Path: The path to the main .tex file, or None if no .tex files are found.
54
+ """
55
+ tex_files = list(folder_path.glob('**/*.tex'))
56
+ if not tex_files:
57
+ return None
58
+
59
+ # Check for common main file names
60
+ common_names = ['main.tex', 'paper.tex', 'article.tex']
61
+ for name in common_names:
62
+ if name in tex_files:
63
+ return name
64
+
65
+ # If no common name found, look for \documentclass
66
+ for file in tex_files:
67
+ with open(file, 'r', encoding='utf-8') as f:
68
+ content = f.read()
69
+ if '\\documentclass' in content:
70
+ return file
71
+ # If still not found, return the first .tex file
72
+ return tex_files[0]
73
+
74
+ def run_latexpand(input_file, output_file):
75
+ """
76
+ Run the latexpand command on the input file and write the result to the output file.
77
+
78
+ This function uses the latexpand tool to expand a LaTeX file, including all its
79
+ inputs and packages, into a single file. The expanded content is then written
80
+ to the specified output file.
81
+
82
+ Args:
83
+ input_file (str or Path): The path to the input LaTeX file.
84
+ output_file (str or Path): The path where the expanded LaTeX content will be written.
85
+
86
+ Raises:
87
+ subprocess.CalledProcessError: If latexpand encounters an error during execution.
88
+ FileNotFoundError: If the latexpand command is not found in the system PATH.
89
+ """
90
+ try:
91
+ result = subprocess.run(['latexpand', input_file],
92
+ capture_output=True, text=True, check=True)
93
+ with open(output_file, 'w', encoding='utf-8') as output_file_handle:
94
+ output_file_handle.write(result.stdout)
95
+ print(f"Expanded LaTeX written to {output_file}")
96
+ except subprocess.CalledProcessError as e:
97
+ print(f"Error running latexpand: {e}")
98
+ except FileNotFoundError:
99
+ print("latexpand not found. Please make sure it's installed and in your PATH.")
100
+
101
+
102
+ cache_path = Path("pqa_index.pkl")
103
+
104
+ if cache_path.exists():
105
+ with open(cache_path, "rb") as f:
106
+ docs = pickle.load(f)
107
+ else:
108
+ docs = Docs()
109
+ for root, dirs, files in Path(".").walk():
110
+ for dir_name in dirs:
111
+ if dir_name.startswith("arXiv-"):
112
+ dir_path = root / dir_name
113
+ concat_main = dir_path / ".main.tex"
114
+ try:
115
+ # Step 1: Find the main entry TeX file
116
+ main_file = find_main_tex_file(dir_path)
117
+ if not main_file:
118
+ raise ValueError("No main TeX file found.")
119
+ # Step 2 & 3: Run latexpand and write output
120
+ run_latexpand(main_file, dir_path / ".main.tex")
121
+ except (ValueError, subprocess.CalledProcessError,
122
+ FileNotFoundError) as preprocess_error:
123
+ print(f"Failed to pre-process {dir_name}: {preprocess_error}")
124
+ continue
125
+ print(f"adding {dir_path} (latex source)")
126
+ try:
127
+ docs.add(concat_main, settings=settings, disable_check=True)
128
+ except (IOError, OSError, ValueError) as add_error:
129
+ print(f"Failed to add {dir_path}: {add_error}")
130
+ continue
131
+ dirs.remove(dir_name)
132
+ break
133
+ else:
134
+ for file_name in files:
135
+ if file_name.lower().endswith((".pdf", ".txt", ".md", ".tex")):
136
+ file_path = root / file_name
137
+ print(f"adding {file_path}")
138
+ docs.add(file_path, settings=settings, disable_check=True)
139
+
140
+ with open(cache_path, "wb") as f:
141
+ pickle.dump(docs, f)
142
+
143
+
144
+ if __name__ == "__main__":
145
+ if len(sys.argv) > 1:
146
+ QUERY = " ".join(sys.argv[1:])
147
+ answer = docs.query(QUERY, settings=settings)
148
+ print(answer)
149
+ else:
150
+ print("Please provide a query as a command-line argument.")
151
+ print("Usage: python script_name.py 'Your query here'")
password ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import random
5
+ import string
6
+
7
+ def generate_password(length=16):
8
+ characters = string.ascii_letters + string.digits + string.punctuation
9
+ password = ''.join(random.choice(characters) for _ in range(length))
10
+ return password
11
+
12
+ # Generate a strong 16-character long password
13
+ strong_password = generate_password()
14
+ print(strong_password)