k4d3 commited on
Commit
06e788b
1 Parent(s): c7dbbcf

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (5) hide show
  1. .zshrc +85 -0
  2. a2mp4 +142 -0
  3. debug_emoji +8 -0
  4. txt2emoji +76 -0
  5. write_dataset_cfg_combined.py +90 -0
.zshrc CHANGED
@@ -1,3 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  export GIN_MODE=release
2
 
3
  export NODE_ENV=production
@@ -191,6 +275,7 @@ alias gcs='git clone --recurse-submodules'
191
  # Alias for running the Grabber-cli command
192
  alias grabber="Grabber-cli"
193
 
 
194
  # 'pie' is a shortcut for installing a Python package in editable mode
195
  # using the pip command with the --use-pep517 option.
196
  alias pie='pip install -e . --use-pep517'
 
1
+ alias wordfreq='find ${1:-.} -type f -name "*.txt" -exec cat {} + | tr "," " " | tr " " "\n" | sort | uniq -c | sort -nr | less'
2
+ alias wq='wordfreq'
3
+ alias 🐺="ollama serve & conda activate openwebui && open-webui serve --port 6969"
4
+ alias gsa='git submodule add'
5
+
6
+ deleteorphantags() {
7
+ # Loop through all .tags files
8
+ for tag_file in **/*.tags; do
9
+ # Check if any corresponding image file exists
10
+ base_name="${tag_file%.tags}"
11
+ if [[ ! -f "${base_name}.png" && ! -f "${base_name}.jpg" && ! -f "${base_name}.jpeg" && ! -f "${base_name}.jxl" && ! -f "${base_name}.webp" ]]; then
12
+ # If no image file exists, delete the .tags file
13
+ rm -v "$tag_file"
14
+ fi
15
+ done
16
+ }
17
+
18
+ deleteorphantxt() {
19
+ # Loop through all .txt files
20
+ for txt_file in **/*.txt; do
21
+ # Check if any corresponding image file exists
22
+ base_name="${txt_file%.tags}"
23
+ if [[ ! -f "${base_name}.png" && ! -f "${base_name}.jpg" && ! -f "${base_name}.jpeg" && ! -f "${base_name}.jxl" && ! -f "${base_name}.webp" ]]; then
24
+ # If no image file exists, delete the .txt file
25
+ rm -v "$txt_file"
26
+ fi
27
+ done
28
+ }
29
+
30
+ look4orphantags() {
31
+ # Loop through all .tags files
32
+ for tag_file in **/*.tags; do
33
+ # Check if any corresponding image file exists
34
+ base_name="${tag_file%.tags}"
35
+ if [[ ! -f "${base_name}.png" && ! -f "${base_name}.jpg" && ! -f "${base_name}.jpeg" && ! -f "${base_name}.jxl" && ! -f "${base_name}.webp" ]]; then
36
+ # If no image file exists, print the .tags file
37
+ echo "$tag_file"
38
+ fi
39
+ done
40
+ }
41
+
42
+ look4orphantxt() {
43
+ # Loop through all .txt files
44
+ for txt_file in **/*.txt; do
45
+ # Check if any corresponding image file exists
46
+ base_name="${txt_file%.tags}"
47
+ if [[ ! -f "${base_name}.png" && ! -f "${base_name}.jpg" && ! -f "${base_name}.jpeg" && ! -f "${base_name}.jxl" && ! -f "${base_name}.webp" ]]; then
48
+ # If no image file exists, print the .txt file
49
+ echo "$txt_file"
50
+ fi
51
+ done
52
+ }
53
+
54
+ check4sig() {
55
+ target_dir="$1"
56
+ if [[ -z "$target_dir" ]]; then
57
+ echo "Please provide a target directory."
58
+ return 1
59
+ fi
60
+
61
+ if [[ ! -d "$target_dir" ]]; then
62
+ echo "The provided target directory does not exist."
63
+ return 1
64
+ fi
65
+
66
+ found_files=()
67
+ for file in "$target_dir"/*.caption; do
68
+ if [[ -f "$file" ]]; then
69
+ if grep -q -e "signature" -e "watermark" "$file"; then
70
+ found_files+=("$file")
71
+ fi
72
+ fi
73
+ done
74
+
75
+ if [[ ${#found_files[@]} -eq 0 ]]; then
76
+ echo "No 'signature' or 'watermark' found in any .caption files."
77
+ else
78
+ echo "Opening files in nvim: ${found_files[@]}"
79
+ nvim "${found_files[@]}"
80
+ fi
81
+ }
82
+
83
+ alias fuckoff="conda deactivate && rconda"
84
+
85
  export GIN_MODE=release
86
 
87
  export NODE_ENV=production
 
275
  # Alias for running the Grabber-cli command
276
  alias grabber="Grabber-cli"
277
 
278
+ alias pi='pip install'
279
  # 'pie' is a shortcut for installing a Python package in editable mode
280
  # using the pip command with the --use-pep517 option.
281
  alias pie='pip install -e . --use-pep517'
a2mp4 ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import argparse
5
+ import os
6
+ import shutil
7
+ import subprocess
8
+ import glob
9
+ import re
10
+ from pathlib import Path
11
+
12
+ def create_video(input_dir, sample, temp_dir, step_multiplier, repeat, max_images):
13
+ output_filename = f"{os.path.basename(os.getcwd())}_sample{sample}.mp4"
14
+ print(f"Processing sample {sample}. Output filename: {output_filename}")
15
+
16
+ # Create repeated images
17
+ print(f"Creating repeated images for sample {sample}...")
18
+ for img in glob.glob(f"{input_dir}/{sample}_*.png"):
19
+ for i in range(repeat):
20
+ base = os.path.splitext(os.path.basename(img))[0]
21
+ shutil.copy(img, f"{temp_dir}/{base}_{i+1}.png")
22
+
23
+ # Prepare ffmpeg options
24
+ vf_options = "scale=1024x1024"
25
+ if step_multiplier:
26
+ vf_options += f",drawtext=fontfile=/usr/share/fonts/TTF/Inconsolata-Light.ttf:text='Steps\\: %{{expr\\:trunc(n*{step_multiplier}/{repeat})}}':x=10:y=h-th-10:fontsize=24:fontcolor=white"
27
+
28
+ if max_images:
29
+ vf_options = f"select='not(mod(n\\,{max_images}))',{vf_options}"
30
+
31
+ # Run first ffmpeg command
32
+ temp_output = f"{temp_dir}/temp_{sample}.mp4"
33
+ ffmpeg_cmd = [
34
+ "ffmpeg", "-framerate", "60",
35
+ "-pattern_type", "glob", "-i", f"{temp_dir}/{sample}_*.png",
36
+ "-vf", vf_options,
37
+ "-crf", "18", "-c:v", "libx264", "-b:v", "12M",
38
+ "-pix_fmt", "yuv420p", "-y", temp_output
39
+ ]
40
+
41
+ try:
42
+ subprocess.run(ffmpeg_cmd, check=True)
43
+ except subprocess.CalledProcessError:
44
+ print(f"Error: ffmpeg command failed for sample {sample}.")
45
+ return False
46
+
47
+ # Get duration and process final video
48
+ try:
49
+ duration_cmd = ["ffmpeg", "-i", temp_output]
50
+ result = subprocess.run(duration_cmd, capture_output=True, text=True)
51
+ duration_match = re.search(r'Duration: (\d{2}):(\d{2}):(\d{2})', result.stderr)
52
+ if duration_match:
53
+ hours, minutes, seconds = map(float, duration_match.groups())
54
+ duration = hours * 3600 + minutes * 60 + seconds
55
+
56
+ final_cmd = [
57
+ "ffmpeg", "-i", temp_output,
58
+ "-vf", "tpad=stop_mode=clone:stop_duration=8",
59
+ "-c:v", "libx264", "-b:v", "12M", "-crf", "18",
60
+ "-pix_fmt", "yuv420p", "-y", output_filename
61
+ ]
62
+ subprocess.run(final_cmd, check=True)
63
+ else:
64
+ print("Error: Could not determine video duration.")
65
+ return False
66
+
67
+ except subprocess.CalledProcessError:
68
+ print(f"Error: Final ffmpeg processing failed for sample {sample}.")
69
+ return False
70
+
71
+ # Clean up temporary files for this sample
72
+ for f in glob.glob(f"{temp_dir}/{sample}_*.png"):
73
+ os.remove(f)
74
+ os.remove(temp_output)
75
+
76
+ return True
77
+
78
+ def get_step_size_from_filenames(sample):
79
+ files = sorted(glob.glob(f"{sample}_*.png"))
80
+ if len(files) < 2:
81
+ return None
82
+
83
+ # Extract step numbers from first two files
84
+ pattern = r'_(\d{5})_'
85
+ first_match = re.search(pattern, files[0])
86
+ second_match = re.search(pattern, files[1])
87
+
88
+ if first_match and second_match:
89
+ first_step = int(first_match.group(1))
90
+ second_step = int(second_match.group(1))
91
+ return second_step - first_step
92
+ return None
93
+
94
+ def main():
95
+ parser = argparse.ArgumentParser(description='Convert PNG sequence to MP4')
96
+ parser.add_argument('--max', type=int, help='Maximum number of images')
97
+ parser.add_argument('--step', type=int, help='Step multiplier')
98
+ parser.add_argument('--repeat', type=int, default=1, help='Repeat count')
99
+ parser.add_argument('--steps-from-filename', action='store_true', help='Calculate steps from filename')
100
+
101
+ args = parser.parse_args()
102
+
103
+ # Create temporary directory
104
+ temp_dir = os.path.expanduser("~/.local/tmp")
105
+ os.makedirs(temp_dir, exist_ok=True)
106
+ print("Created temporary directory...")
107
+
108
+ # Check for PNG files
109
+ png_files = glob.glob("*.png")
110
+ if not png_files:
111
+ print("Error: No PNG files found in the current directory.")
112
+ return 1
113
+
114
+ # Find all unique sample numbers
115
+ sample_pattern = r'([a-zA-Z]+)_\d{5}_'
116
+ samples = sorted(set(re.findall(sample_pattern, ' '.join(png_files))))
117
+
118
+ for sample in samples:
119
+ if args.steps_from_filename:
120
+ step_multiplier = get_step_size_from_filenames(sample)
121
+ if step_multiplier:
122
+ print(f"Detected step size: {step_multiplier}")
123
+ else:
124
+ print("Error: Could not determine step size from filenames")
125
+ continue
126
+ else:
127
+ step_multiplier = args.step
128
+
129
+ success = create_video(".", sample, temp_dir, step_multiplier, args.repeat, args.max)
130
+ if not success:
131
+ shutil.rmtree(temp_dir)
132
+ return 1
133
+
134
+ # Clean up
135
+ print("Cleaning up temporary directory...")
136
+ shutil.rmtree(temp_dir)
137
+ print("All samples processed successfully.")
138
+ return 0
139
+
140
+ if __name__ == "__main__":
141
+ exit(main())
142
+
debug_emoji ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import emoji
2
+
3
+ def print_all_emojis():
4
+ for em in emoji.EMOJI_DATA:
5
+ print(em)
6
+
7
+ print_all_emojis()
8
+
txt2emoji ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import word_tokenize
3
+ from emoji import EMOJI_DATA
4
+
5
+ # Download required NLTK data (only needed once)
6
+ nltk.download('punkt', quiet=True)
7
+
8
+ def get_emoji_mapping():
9
+ """Create a mapping of words to emojis."""
10
+ emoji_map = {}
11
+ for emoji_char, data in EMOJI_DATA.items():
12
+ if 'en' in data: # If emoji has English description
13
+ words = data['en'].lower().replace('_', ' ').split()
14
+ for word in words:
15
+ if word not in emoji_map:
16
+ emoji_map[word] = []
17
+ emoji_map[word].append(emoji_char)
18
+ return emoji_map
19
+
20
+ def text_to_emojis(text):
21
+ """Convert text to related emojis."""
22
+ # Create emoji mapping
23
+ emoji_map = get_emoji_mapping()
24
+
25
+ # Additional manual mappings for common words
26
+ custom_mappings = {
27
+ 'love': '❤️',
28
+ 'cat': '😺',
29
+ 'cats': '😺',
30
+ 'dog': '🐶',
31
+ 'dogs': '🐶',
32
+ 'sun': '☀️',
33
+ 'moon': '🌙',
34
+ 'star': '⭐',
35
+ 'happy': '😊',
36
+ 'sad': '😢',
37
+ 'angry': '😠',
38
+ 'food': '🍔',
39
+ 'heart': '❤️',
40
+ 'fire': '🔥',
41
+ 'hot': '🔥',
42
+ 'cold': '❄️',
43
+ 'snow': '❄️',
44
+ 'rain': '🌧️',
45
+ 'smile': '😊',
46
+ 'laugh': '😂',
47
+ 'cry': '😢',
48
+ }
49
+
50
+ # Tokenize the input text
51
+ tokens = word_tokenize(text.lower())
52
+
53
+ # Store found emojis
54
+ found_emojis = []
55
+
56
+ # Process each token
57
+ for token in tokens:
58
+ # First check custom mappings
59
+ if token in custom_mappings:
60
+ found_emojis.append(custom_mappings[token])
61
+ continue
62
+
63
+ # Then check emoji mapping
64
+ if token in emoji_map:
65
+ found_emojis.append(emoji_map[token][0]) # Take first matching emoji
66
+
67
+ # Return emojis as comma-separated string
68
+ return ' '.join(found_emojis) if found_emojis else ''
69
+
70
+ # Example usage
71
+ if __name__ == "__main__":
72
+ # Test the function
73
+ sample_text = "I love cats and dogs. The sun is shining!"
74
+ result = text_to_emojis(sample_text)
75
+ print(f"Input text: {sample_text}")
76
+ print(f"Emojis: {result}")
write_dataset_cfg_combined.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/env python
2
+
3
+ import os
4
+ import toml
5
+ import sys
6
+ from pathlib import Path
7
+ from collections import defaultdict, Counter
8
+ from pprint import pprint
9
+
10
+ def update_config(root_dir):
11
+ root_dir = Path(root_dir).resolve()
12
+ config_path = root_dir / "config.toml"
13
+ config = toml.load(config_path)
14
+ stats = defaultdict(Counter)
15
+
16
+ new_subsets = []
17
+ for dataset_path in root_dir.iterdir():
18
+ if not dataset_path.is_dir() or dataset_path.name[0] == '.':
19
+ continue
20
+ for subset_path in dataset_path.iterdir():
21
+ subset_name = subset_path.name
22
+ subset_path = dataset_path / subset_path
23
+ if not subset_path.is_dir() or subset_name[0] == '.':
24
+ continue
25
+
26
+ # Collect the dataset information for the config.toml
27
+ try:
28
+ num_repeats = int(subset_name.partition('_')[0])
29
+ except ValueError:
30
+ num_repeats = 1
31
+ new_subsets.append({
32
+ "image_dir": str(subset_path),
33
+ "num_repeats": num_repeats
34
+ })
35
+
36
+ # Accumulate statistics for each subset
37
+ # First collect the extensions of the files in the subset
38
+ data_files = defaultdict(set)
39
+ for file in subset_path.iterdir():
40
+ ext = file.suffix
41
+ if ext not in {'.txt', '.tags', '.caption', '.txt', '.jxl', '.jpg', '.jpeg', '.png', '.json'}:
42
+ continue
43
+ stem = file.stem.partition('.')[0]
44
+ if stem == 'sample-prompts':
45
+ continue
46
+ data_files[stem].add(ext)
47
+ # Classify the files in the subset
48
+ subset_stats = stats[subset_path]
49
+ for stem, exts in data_files.items():
50
+ has_caption = bool({'.txt', '.caption', 'caption', '.tags'} & exts)
51
+ has_image = bool({'.jpg', '.jpeg', '.png', '.jxl'} & exts)
52
+
53
+ if has_caption and has_image:
54
+ subset_stats["captioned"] += 1
55
+ elif has_image:
56
+ subset_stats["no_caption"] += 1
57
+ elif has_caption:
58
+ subset_stats["orphans"] += 1
59
+ if 'DELETE_ORPHANS' in os.environ:
60
+ print(f"Deleting orphan {subset_path / f'{stem}{ext}'}")
61
+ if not 'DEBUG' in os.environ:
62
+ for ext in exts:
63
+ (subset_path / f"{stem}{ext}").unlink()
64
+ raise NotImplementedError("UNFINISHED DO NOT USE")
65
+ else:
66
+ if '.toml' not in exts:
67
+ for ext in exts:
68
+ subset_stats[ext] += 1
69
+
70
+ # Edit the config.toml
71
+ config["datasets"][0]["subsets"] = new_subsets
72
+
73
+ if "DEBUG" in os.environ:
74
+ print(toml.dumps(config))
75
+ else:
76
+ with open(config_path, "w") as f:
77
+ toml.dump(config, f)
78
+
79
+ return stats
80
+
81
+ if __name__ == "__main__":
82
+ if len(sys.argv) < 2:
83
+ print("Usage: [DEBUG=1] [DELETE_ORPHANS=1] python script.py <ROOT_DIR>")
84
+ sys.exit(1)
85
+
86
+ root_dir = sys.argv[1]
87
+ stats = update_config(root_dir)
88
+ # Print statistics for each subset
89
+ for subset, subset_stats in sorted(stats.items(), key=lambda x: x[0]):
90
+ print(subset, dict(subset_stats))