Saving train state of step 5000
Browse files- checkpoint-5000-epoch-0/model.safetensors +3 -0
- checkpoint-5000-epoch-0/model_1.safetensors +3 -0
- checkpoint-5000-epoch-0/optimizer.bin +3 -0
- checkpoint-5000-epoch-0/random_states_0.pkl +3 -0
- checkpoint-5000-epoch-0/scheduler.bin +3 -0
- distil-whisper/events.out.tfevents.1715198685.server02.2050598.0 +2 -2
- distil-whisper/events.out.tfevents.1715202403.server02.2067342.0 +3 -0
- distil-whisper/events.out.tfevents.1715222264.server02.2131186.0 +3 -0
- run_distillation.py +1 -1
checkpoint-5000-epoch-0/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67c8afe47501f6ac5b2f112cbfc5b57bce18e4930d09140d818a8e1397b02110
|
3 |
+
size 3025686376
|
checkpoint-5000-epoch-0/model_1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b395c8a7e2bda655c415580106288d0387c227efd641bf4e11c1cd735fdb37a
|
3 |
+
size 4361070048
|
checkpoint-5000-epoch-0/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa1457704c96775105d1787f67bb4a4cb2c899b948d39aae7605519a119a841d
|
3 |
+
size 955539578
|
checkpoint-5000-epoch-0/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:182d0310c20462ddb30b52d3b75bc973bf951ee7a1044515e35ba0fa6bf75189
|
3 |
+
size 14344
|
checkpoint-5000-epoch-0/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e59aef00050f673889222b3d653de69a99bb1a8c64cad08748089305697e88a
|
3 |
+
size 1064
|
distil-whisper/events.out.tfevents.1715198685.server02.2050598.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acd480b8f76bebae6dd2b6412b25e2b1b996840cd1b32bf89eb83fd3199a3008
|
3 |
+
size 1680
|
distil-whisper/events.out.tfevents.1715202403.server02.2067342.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:155cd2eec4daa073571f0a64cf8d9e267994346662108932f0971dba6abde89d
|
3 |
+
size 88
|
distil-whisper/events.out.tfevents.1715222264.server02.2131186.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32a06e3a1863146ccc1d6e9d2f7348ca1e03111936f4b49886b06d25442480bd
|
3 |
+
size 62058
|
run_distillation.py
CHANGED
@@ -1756,5 +1756,5 @@ def main():
|
|
1756 |
if __name__ == "__main__":
|
1757 |
main()
|
1758 |
'''
|
1759 |
-
accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps
|
1760 |
'''
|
|
|
1756 |
if __name__ == "__main__":
|
1757 |
main()
|
1758 |
'''
|
1759 |
+
accelerate launch --mixed_precision=bf16 run_distillation.py --model_name_or_path "./distil-large-v3-init" --teacher_model_name_or_path "openai/whisper-large-v3" --train_dataset_name "mozilla-foundation/common_voice_15_0" --train_dataset_config_name "de" --train_split_name "train" --text_column_name "sentence" --eval_dataset_name "mozilla-foundation/common_voice_15_0" --eval_dataset_config_name "de" --eval_split_name "validation" --eval_text_column_name "sentence" --eval_steps 5000 --save_steps 5000 --warmup_steps 500 --learning_rate 1e-4 --lr_scheduler_type "linear" --logging_steps 25 --save_total_limit 1 --max_steps 50000 --per_device_train_batch_size 4 --per_device_eval_batch_size 4 --dataloader_num_workers 2 --preprocessing_num_workers 2 --ddp_timeout 7200 --dtype "bfloat16" --output_dir "./" --use_pseudo_labels "false" --condition_on_prev_probability "0.0" --do_train --do_eval --gradient_checkpointing --overwrite_output_dir --predict_with_generate --freeze_encoder --streaming --push_to_hub --language de --max_eval_samples 50
|
1760 |
'''
|