k4d3 commited on
Commit
7dd51e7
1 Parent(s): 2740489

add training script

Browse files

Signed-off-by: Balazs Horvath <acsipont@gmail.com>

Files changed (1) hide show
  1. training_scripts/ti-furious +135 -0
training_scripts/ti-furious ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env zsh
2
+ #
3
+ # >>> conda initialize >>>
4
+ # !! Contents within this block are managed by 'conda init' !!
5
+ __conda_setup="$('/home/kade/miniconda3/bin/conda' 'shell.zsh' 'hook' 2> /dev/null)"
6
+ if [ $? -eq 0 ]; then
7
+ eval "$__conda_setup"
8
+ else
9
+ if [ -f "/home/kade/miniconda3/etc/profile.d/conda.sh" ]; then
10
+ . "/home/kade/miniconda3/etc/profile.d/conda.sh"
11
+ else
12
+ export PATH="/home/kade/miniconda3/bin:$PATH"
13
+ fi
14
+ fi
15
+ unset __conda_setup
16
+ # <<< conda initialize <
17
+
18
+ conda activate sdscripts
19
+
20
+ NAME="furious-v3s2000"
21
+ TRAINING_DIR="/home/kade/datasets/furious"
22
+ OUTPUT_DIR="/home/kade/output_dir"
23
+
24
+ # Extract the number of steps from the NAME
25
+ STEPS=$(echo $NAME | grep -oE '[0-9]+$')
26
+
27
+ # If no number is found at the end of NAME, set a default value
28
+ if [ -z "$STEPS" ]; then
29
+ STEPS=4096
30
+ echo "No step count found in NAME. Using default value of \e[35m$STEPS\e[0m"
31
+ else
32
+ echo "Extracted \e[35m$STEPS\e[0m steps from NAME"
33
+ fi
34
+
35
+ # alpha=1 @ dim=16 is the same lr than alpha=4 @ dim=256
36
+ # --min_snr_gamma=1
37
+ args=(
38
+ # ⚠️ TODO: Benchmark...
39
+ --debiased_estimation_loss
40
+ # ⚠️ TODO: What does this do? Does it even work?
41
+ --max_token_length=225
42
+ # Keep Tokens
43
+ --keep_tokens=1
44
+ --keep_tokens_separator="|||"
45
+ # Model
46
+ --pretrained_model_name_or_path=/home/kade/ComfyUI/models/checkpoints/illustriousXLV01.safetensors
47
+ # Output, logging
48
+ --output_dir="$OUTPUT_DIR/$NAME"
49
+ --output_name="$NAME"
50
+ --log_prefix="$NAME-"
51
+ --log_with=tensorboard
52
+ --logging_dir="$OUTPUT_DIR/logs"
53
+ --seed=1728871242
54
+ # Dataset
55
+ --train_data_dir="$TRAINING_DIR"
56
+ --dataset_repeats=1
57
+ --resolution="1024,1024"
58
+ --enable_bucket
59
+ --bucket_reso_steps=64
60
+ --min_bucket_reso=256
61
+ --max_bucket_reso=2048
62
+ --flip_aug
63
+ --shuffle_caption
64
+ --cache_latents
65
+ --cache_latents_to_disk
66
+ --max_data_loader_n_workers=4
67
+ --persistent_data_loader_workers
68
+ # Network config
69
+ --network_dim=100000
70
+ # ⚠️ TODO: Plot
71
+ --network_alpha=64
72
+ --network_module="lycoris.kohya"
73
+ #--network_train_text_encoder_only
74
+ --network_args
75
+ "preset=full-lin"
76
+ "conv_dim=0"
77
+ "decompose_both=False"
78
+ "conv_alpha=64"
79
+ "rank_dropout=0"
80
+ "module_dropout=0"
81
+ "use_tucker=True"
82
+ "use_scalar=False"
83
+ "rank_dropout_scale=False"
84
+ "algo=lokr"
85
+ "bypass_mode=False"
86
+ "factor=16"
87
+ "dora_wd=True"
88
+ "train_norm=False"
89
+ --network_dropout=0
90
+ # Optimizer config
91
+ --optimizer_type=ClybW
92
+ --train_batch_size=14
93
+ #--gradient_accumulation_steps=2
94
+ --max_grad_norm=1
95
+ --gradient_checkpointing
96
+ #--scale_weight_norms=1
97
+ # LR Scheduling
98
+ --max_train_steps=$STEPS
99
+ --lr_warmup_steps=0
100
+ --learning_rate=0.0001
101
+ --unet_lr=0.0002
102
+ --text_encoder_lr=0.0001
103
+ --lr_scheduler="cosine"
104
+ --lr_scheduler_args="num_cycles=0.375"
105
+ # Noise
106
+ --multires_noise_iterations=12
107
+ --multires_noise_discount=0.4
108
+ #--min_snr_gamma=1
109
+ # Optimization, details
110
+ --no_half_vae
111
+ --sdpa
112
+ --mixed_precision="bf16"
113
+ # Saving
114
+ --save_model_as="safetensors"
115
+ --save_precision="fp16"
116
+ --save_every_n_steps=50
117
+ # Saving States
118
+ #--save_state
119
+ # Either resume from a saved state
120
+ #--resume="$OUTPUT_DIR/wolflink-vfucks400" # Resume from saved state
121
+ #--skip_until_initial_step
122
+ # Or from a checkpoint
123
+ #--network_weights="$OUTPUT_DIR/wolflink-vfucks400/wolflink-vfucks400-step00000120.safetensors" # Resume from checkpoint (not needed with state, i think)
124
+ #--initial_step=120
125
+ # Sampling
126
+ --sample_every_n_steps=100
127
+ --sample_prompts="$TRAINING_DIR/sample-prompts.txt"
128
+ --sample_sampler="euler_a"
129
+ --sample_at_first
130
+ --caption_extension=".txt"
131
+ )
132
+ cd ~/source/repos/sd-scripts-sd3
133
+ #accelerate launch --num_cpu_threads_per_process=2
134
+ python "./sdxl_train_network.py" "${args[@]}" "$@"
135
+ cd ~