k4d3 commited on
Commit
3e02680
1 Parent(s): 9adaca0

concat_captions and zsh fuckery

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (2) hide show
  1. .zshrc +13 -13
  2. concat_captions +64 -0
.zshrc CHANGED
@@ -261,17 +261,17 @@ function re() {
261
  # It searches for all .txt files in the specified directory and its subdirectories.
262
  # If a file contains the specified tag, the function removes the tag from its original position
263
  # and prepends it to the beginning of the file.
264
- # Usage: rejiggle_captions <tag> <directory>
265
- rejiggle_captions() {
266
  local tag="$1"
267
  local dir="$2"
268
 
269
  if [[ -z "$tag" || -z "$dir" ]]; then
270
- echo "Usage: rejiggle_captions <tag> <directory>"
271
  return 1
272
  fi
273
 
274
- find "$dir" -type f -name "*.txt" | while read -r file; do
275
  if grep -q "$tag" "$file"; then
276
  sed -i "s/$tag//g" "$file"
277
  sed -i "1s/^/$tag, /" "$file"
@@ -385,14 +385,14 @@ list_word_freqs() {
385
 
386
  # Function: sample_prompts
387
  # Description:
388
- # This function takes a sample of the tags (or captions) in a target training directory.
389
  # It reads and displays the contents of all .txt files in the specified directory,
390
- # providing a quick overview of the tags or captions used for training.
391
  #
392
  # Usage: sample_prompts <target_directory>
393
  #
394
  # Parameters:
395
- # - target_directory: The directory containing the .txt files with tags or captions.
396
  #
397
  # Output:
398
  # Prints the contents of each .txt file in the target directory, separated by newlines.
@@ -406,12 +406,12 @@ sample_prompts() {
406
  }
407
 
408
  # replace_comma_with_keep_tags
409
- # Description: This function replaces the specified occurrence of a comma with " |||" in all *.txt files
410
  # in all subdirectories of a target directory or the current directory when no path is passed.
411
  # Usage: replace_comma_with_keep_tags <occurrence_number> [target_directory]
412
  # Parameters:
413
  # - occurrence_number: The occurrence number of the comma to be replaced (e.g., 1 for the first occurrence).
414
- # - target_directory (optional): The target directory to search for *.txt files. If not provided, the current directory is used.
415
  # Example:
416
  # replace_comma_with_keep_tags 2 /path/to/directory
417
  # replace_comma_with_keep_tags 1
@@ -424,7 +424,7 @@ replace_comma_with_keep_tags() {
424
  return 1
425
  fi
426
 
427
- find "$target_directory" -type f -name "*.txt" | while read -r file; do
428
  awk -v occurrence="$occurrence_number" '{
429
  count = 0
430
  for (i = 1; i <= NF; i++) {
@@ -670,18 +670,18 @@ replace_text_in_files() {
670
 
671
  # This script adds a specified prefix to the beginning of each text file in a given directory.
672
  # If the prefix already exists in the text file, it moves the prefix to the front of the text file without leaving extra commas or spaces.
673
- # Usage: inject_to_captions <directory> <prefix>
674
  # Arguments:
675
  # <directory> - The directory containing the text files to be modified.
676
  # <prefix> - The prefix to be added to the beginning of each text file.
677
  # The script checks if the specified directory exists and iterates over each text file in the directory.
678
  # For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
679
  # If the directory does not exist, it prints an error message.
680
- inject_to_captions() {
681
  local dir="$1"
682
  local prefix="$2"
683
  if [[ -d "$dir" ]]; then
684
- for file in "$dir"/*.txt; do
685
  if [[ -f "$file" ]]; then
686
  if grep -q "$prefix" "$file"; then
687
  # Move the existing prefix to the front of the text file without leaving extra commas or spaces
 
261
  # It searches for all .txt files in the specified directory and its subdirectories.
262
  # If a file contains the specified tag, the function removes the tag from its original position
263
  # and prepends it to the beginning of the file.
264
+ # Usage: rejiggle_tags <tag> <directory>
265
+ rejiggle_tags() {
266
  local tag="$1"
267
  local dir="$2"
268
 
269
  if [[ -z "$tag" || -z "$dir" ]]; then
270
+ echo "Usage: rejiggle_tags <tag> <directory>"
271
  return 1
272
  fi
273
 
274
+ find "$dir" -type f -name "*.tags" | while read -r file; do
275
  if grep -q "$tag" "$file"; then
276
  sed -i "s/$tag//g" "$file"
277
  sed -i "1s/^/$tag, /" "$file"
 
385
 
386
  # Function: sample_prompts
387
  # Description:
388
+ # This function takes a sample of the tag in a target training directory.
389
  # It reads and displays the contents of all .txt files in the specified directory,
390
+ # providing a quick overview of the tags used for training.
391
  #
392
  # Usage: sample_prompts <target_directory>
393
  #
394
  # Parameters:
395
+ # - target_directory: The directory containing the .txt files with tags.
396
  #
397
  # Output:
398
  # Prints the contents of each .txt file in the target directory, separated by newlines.
 
406
  }
407
 
408
  # replace_comma_with_keep_tags
409
+ # Description: This function replaces the specified occurrence of a comma with " |||" in all *.tags files
410
  # in all subdirectories of a target directory or the current directory when no path is passed.
411
  # Usage: replace_comma_with_keep_tags <occurrence_number> [target_directory]
412
  # Parameters:
413
  # - occurrence_number: The occurrence number of the comma to be replaced (e.g., 1 for the first occurrence).
414
+ # - target_directory (optional): The target directory to search for *.tags files. If not provided, the current directory is used.
415
  # Example:
416
  # replace_comma_with_keep_tags 2 /path/to/directory
417
  # replace_comma_with_keep_tags 1
 
424
  return 1
425
  fi
426
 
427
+ find "$target_directory" -type f -name "*.tags" | while read -r file; do
428
  awk -v occurrence="$occurrence_number" '{
429
  count = 0
430
  for (i = 1; i <= NF; i++) {
 
670
 
671
  # This script adds a specified prefix to the beginning of each text file in a given directory.
672
  # If the prefix already exists in the text file, it moves the prefix to the front of the text file without leaving extra commas or spaces.
673
+ # Usage: inject_to_tags <directory> <prefix>
674
  # Arguments:
675
  # <directory> - The directory containing the text files to be modified.
676
  # <prefix> - The prefix to be added to the beginning of each text file.
677
  # The script checks if the specified directory exists and iterates over each text file in the directory.
678
  # For each text file, it creates a temporary file with the modified content and then replaces the original file with the temporary file.
679
  # If the directory does not exist, it prints an error message.
680
+ inject_to_tags() {
681
  local dir="$1"
682
  local prefix="$2"
683
  if [[ -d "$dir" ]]; then
684
+ for file in "$dir"/*.tags; do
685
  if [[ -f "$file" ]]; then
686
  if grep -q "$prefix" "$file"; then
687
  # Move the existing prefix to the front of the text file without leaving extra commas or spaces
concat_captions ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ This script walks through a directory, identifies image files, and checks for the existence of corresponding
5
+ .caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files.
6
+
7
+ Usage:
8
+ - Place the script in the directory containing the image files.
9
+ - Run the script to concatenate .caption and .tags files into .txt files.
10
+ - Use the dry_run flag to preview the changes without writing to the .txt files.
11
+
12
+ Functions:
13
+ get_files(path): Walks through the directory and yields image files along with their .caption and .tags files.
14
+ concat(caption_path, tags_path, txt_path, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file.
15
+ """
16
+
17
+ from pathlib import Path
18
+ import os
19
+
20
+ FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl"}
21
+
22
+
23
+ def get_files(path):
24
+ path = Path(path)
25
+ # Walk the directory, looking for image files
26
+ for root, dirs, files in os.walk(path):
27
+ root = path / root
28
+ for file in files:
29
+ file = root / file
30
+ if file.suffix not in FILE_EXTS:
31
+ continue
32
+ caption = file.with_suffix(".caption")
33
+ tags = file.with_suffix(".tags")
34
+ txt = file.with_suffix(".txt")
35
+ if not caption.exists():
36
+ print(f"{caption} does not exist")
37
+ if not tags.exists():
38
+ print(f"{tags} does not exist")
39
+ yield file, caption, tags, txt
40
+
41
+
42
+ def concat(caption_path, tags_path, txt_path, dry_run=False):
43
+ with open(caption_path, "r") as f:
44
+ caption = f.read().strip()
45
+
46
+ with open(tags_path, "r") as f:
47
+ tags = f.read().strip(", \n")
48
+
49
+ txt = f"{tags}, {caption}"
50
+
51
+ if dry_run:
52
+ print(f"{txt_path}:")
53
+ print(txt)
54
+ print()
55
+ else:
56
+ with open(txt_path, 'w') as f:
57
+ f.write(txt)
58
+ print(f"wrote {txt_path}")
59
+
60
+ if __name__ == "__main__":
61
+ dry_run = False
62
+ for f in get_files("."):
63
+ concat(*f[1:], dry_run=dry_run)
64
+