concat_captions and zsh fuckery
Browse filesSigned-off-by: Balazs Horvath <acsipont@gmail.com>
- .zshrc +13 -13
- concat_captions +64 -0
.zshrc
CHANGED
@@ -261,17 +261,17 @@ function re() {
|
|
261 |
# It searches for all .txt files in the specified directory and its subdirectories.
|
262 |
# If a file contains the specified tag, the function removes the tag from its original position
|
263 |
# and prepends it to the beginning of the file.
|
264 |
-
# Usage:
|
265 |
-
|
266 |
local tag="$1"
|
267 |
local dir="$2"
|
268 |
|
269 |
if [[ -z "$tag" || -z "$dir" ]]; then
|
270 |
-
echo "Usage:
|
271 |
return 1
|
272 |
fi
|
273 |
|
274 |
-
find "$dir" -type f -name "*.
|
275 |
if grep -q "$tag" "$file"; then
|
276 |
sed -i "s/$tag//g" "$file"
|
277 |
sed -i "1s/^/$tag, /" "$file"
|
@@ -385,14 +385,14 @@ list_word_freqs() {
|
|
385 |
|
386 |
# Function: sample_prompts
|
387 |
# Description:
|
388 |
-
# This function takes a sample of the
|
389 |
# It reads and displays the contents of all .txt files in the specified directory,
|
390 |
-
# providing a quick overview of the tags
|
391 |
#
|
392 |
# Usage: sample_prompts <target_directory>
|
393 |
#
|
394 |
# Parameters:
|
395 |
-
# - target_directory: The directory containing the .txt files with tags
|
396 |
#
|
397 |
# Output:
|
398 |
# Prints the contents of each .txt file in the target directory, separated by newlines.
|
@@ -406,12 +406,12 @@ sample_prompts() {
|
|
406 |
}
|
407 |
|
408 |
# replace_comma_with_keep_tags
|
409 |
-
# Description: This function replaces the specified occurrence of a comma with " |||" in all *.
|
410 |
# in all subdirectories of a target directory or the current directory when no path is passed.
|
411 |
# Usage: replace_comma_with_keep_tags <occurrence_number> [target_directory]
|
412 |
# Parameters:
|
413 |
# - occurrence_number: The occurrence number of the comma to be replaced (e.g., 1 for the first occurrence).
|
414 |
-
# - target_directory (optional): The target directory to search for *.
|
415 |
# Example:
|
416 |
# replace_comma_with_keep_tags 2 /path/to/directory
|
417 |
# replace_comma_with_keep_tags 1
|
@@ -424,7 +424,7 @@ replace_comma_with_keep_tags() {
|
|
424 |
return 1
|
425 |
fi
|
426 |
|
427 |
-
find "$target_directory" -type f -name "*.
|
428 |
awk -v occurrence="$occurrence_number" '{
|
429 |
count = 0
|
430 |
for (i = 1; i <= NF; i++) {
|
@@ -670,18 +670,18 @@ replace_text_in_files() {
|
|
670 |
|
671 |
# This script adds a specified prefix to the beginning of each text file in a given directory.
|
672 |
# If the prefix already exists in the text file, it moves the prefix to the front of the text file without leaving extra commas or spaces.
|
673 |
-
# Usage:
|
674 |
# Arguments:
|
675 |
# <directory> - The directory containing the text files to be modified.
|
676 |
# <prefix> - The prefix to be added to the beginning of each text file.
|
677 |
# The script checks if the specified directory exists and iterates over each text file in the directory.
|
678 |
# For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
|
679 |
# If the directory does not exist, it prints an error message.
|
680 |
-
|
681 |
local dir="$1"
|
682 |
local prefix="$2"
|
683 |
if [[ -d "$dir" ]]; then
|
684 |
-
for file in "$dir"/*.
|
685 |
if [[ -f "$file" ]]; then
|
686 |
if grep -q "$prefix" "$file"; then
|
687 |
# Move the existing prefix to the front of the text file without leaving extra commas or spaces
|
|
|
261 |
# It searches for all .txt files in the specified directory and its subdirectories.
|
262 |
# If a file contains the specified tag, the function removes the tag from its original position
|
263 |
# and prepends it to the beginning of the file.
|
264 |
+
# Usage: rejiggle_tags <tag> <directory>
|
265 |
+
rejiggle_tags() {
|
266 |
local tag="$1"
|
267 |
local dir="$2"
|
268 |
|
269 |
if [[ -z "$tag" || -z "$dir" ]]; then
|
270 |
+
echo "Usage: rejiggle_tags <tag> <directory>"
|
271 |
return 1
|
272 |
fi
|
273 |
|
274 |
+
find "$dir" -type f -name "*.tags" | while read -r file; do
|
275 |
if grep -q "$tag" "$file"; then
|
276 |
sed -i "s/$tag//g" "$file"
|
277 |
sed -i "1s/^/$tag, /" "$file"
|
|
|
385 |
|
386 |
# Function: sample_prompts
|
387 |
# Description:
|
388 |
+
# This function takes a sample of the tag in a target training directory.
|
389 |
# It reads and displays the contents of all .txt files in the specified directory,
|
390 |
+
# providing a quick overview of the tags used for training.
|
391 |
#
|
392 |
# Usage: sample_prompts <target_directory>
|
393 |
#
|
394 |
# Parameters:
|
395 |
+
# - target_directory: The directory containing the .txt files with tags.
|
396 |
#
|
397 |
# Output:
|
398 |
# Prints the contents of each .txt file in the target directory, separated by newlines.
|
|
|
406 |
}
|
407 |
|
408 |
# replace_comma_with_keep_tags
|
409 |
+
# Description: This function replaces the specified occurrence of a comma with " |||" in all *.tags files
|
410 |
# in all subdirectories of a target directory or the current directory when no path is passed.
|
411 |
# Usage: replace_comma_with_keep_tags <occurrence_number> [target_directory]
|
412 |
# Parameters:
|
413 |
# - occurrence_number: The occurrence number of the comma to be replaced (e.g., 1 for the first occurrence).
|
414 |
+
# - target_directory (optional): The target directory to search for *.tags files. If not provided, the current directory is used.
|
415 |
# Example:
|
416 |
# replace_comma_with_keep_tags 2 /path/to/directory
|
417 |
# replace_comma_with_keep_tags 1
|
|
|
424 |
return 1
|
425 |
fi
|
426 |
|
427 |
+
find "$target_directory" -type f -name "*.tags" | while read -r file; do
|
428 |
awk -v occurrence="$occurrence_number" '{
|
429 |
count = 0
|
430 |
for (i = 1; i <= NF; i++) {
|
|
|
670 |
|
671 |
# This script adds a specified prefix to the beginning of each text file in a given directory.
|
672 |
# If the prefix already exists in the text file, it moves the prefix to the front of the text file without leaving extra commas or spaces.
|
673 |
+
# Usage: inject_to_tags <directory> <prefix>
|
674 |
# Arguments:
|
675 |
# <directory> - The directory containing the text files to be modified.
|
676 |
# <prefix> - The prefix to be added to the beginning of each text file.
|
677 |
# The script checks if the specified directory exists and iterates over each text file in the directory.
|
678 |
# For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
|
679 |
# If the directory does not exist, it prints an error message.
|
680 |
+
inject_to_tags() {
|
681 |
local dir="$1"
|
682 |
local prefix="$2"
|
683 |
if [[ -d "$dir" ]]; then
|
684 |
+
for file in "$dir"/*.tags; do
|
685 |
if [[ -f "$file" ]]; then
|
686 |
if grep -q "$prefix" "$file"; then
|
687 |
# Move the existing prefix to the front of the text file without leaving extra commas or spaces
|
concat_captions
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
"""
|
4 |
+
This script walks through a directory, identifies image files, and checks for the existence of corresponding
|
5 |
+
.caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files.
|
6 |
+
|
7 |
+
Usage:
|
8 |
+
- Place the script in the directory containing the image files.
|
9 |
+
- Run the script to concatenate .caption and .tags files into .txt files.
|
10 |
+
- Use the dry_run flag to preview the changes without writing to the .txt files.
|
11 |
+
|
12 |
+
Functions:
|
13 |
+
get_files(path): Walks through the directory and yields image files along with their .caption and .tags files.
|
14 |
+
concat(caption_path, tags_path, txt_path, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file.
|
15 |
+
"""
|
16 |
+
|
17 |
+
from pathlib import Path
|
18 |
+
import os
|
19 |
+
|
20 |
+
FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl"}
|
21 |
+
|
22 |
+
|
23 |
+
def get_files(path):
|
24 |
+
path = Path(path)
|
25 |
+
# Walk the directory, looking for image files
|
26 |
+
for root, dirs, files in os.walk(path):
|
27 |
+
root = path / root
|
28 |
+
for file in files:
|
29 |
+
file = root / file
|
30 |
+
if file.suffix not in FILE_EXTS:
|
31 |
+
continue
|
32 |
+
caption = file.with_suffix(".caption")
|
33 |
+
tags = file.with_suffix(".tags")
|
34 |
+
txt = file.with_suffix(".txt")
|
35 |
+
if not caption.exists():
|
36 |
+
print(f"{caption} does not exist")
|
37 |
+
if not tags.exists():
|
38 |
+
print(f"{tags} does not exist")
|
39 |
+
yield file, caption, tags, txt
|
40 |
+
|
41 |
+
|
42 |
+
def concat(caption_path, tags_path, txt_path, dry_run=False):
|
43 |
+
with open(caption_path, "r") as f:
|
44 |
+
caption = f.read().strip()
|
45 |
+
|
46 |
+
with open(tags_path, "r") as f:
|
47 |
+
tags = f.read().strip(", \n")
|
48 |
+
|
49 |
+
txt = f"{tags}, {caption}"
|
50 |
+
|
51 |
+
if dry_run:
|
52 |
+
print(f"{txt_path}:")
|
53 |
+
print(txt)
|
54 |
+
print()
|
55 |
+
else:
|
56 |
+
with open(txt_path, 'w') as f:
|
57 |
+
f.write(txt)
|
58 |
+
print(f"wrote {txt_path}")
|
59 |
+
|
60 |
+
if __name__ == "__main__":
|
61 |
+
dry_run = False
|
62 |
+
for f in get_files("."):
|
63 |
+
concat(*f[1:], dry_run=dry_run)
|
64 |
+
|