Model save
Browse files- README.md +5 -5
- all_results.json +8 -0
- breeze-listen-w2v2-kn-GF.log +6 -6
- config.json +1 -1
- model.safetensors +1 -1
- train-ctc-model.sh +3 -2
- train_results.json +8 -0
- trainer_state.json +30 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
license: cc-by-nc-4.0
|
3 |
-
base_model: facebook/mms-1b-
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
datasets:
|
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
15 |
|
16 |
# breeze-listen-w2v2-kn-GF
|
17 |
|
18 |
-
This model is a fine-tuned version of [facebook/mms-1b-
|
19 |
|
20 |
## Model description
|
21 |
|
@@ -35,12 +35,12 @@ More information needed
|
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
- learning_rate: 0.001
|
38 |
-
- train_batch_size:
|
39 |
- eval_batch_size: 8
|
40 |
- seed: 42
|
41 |
- distributed_type: multi-GPU
|
42 |
-
- gradient_accumulation_steps:
|
43 |
-
- total_train_batch_size:
|
44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
45 |
- lr_scheduler_type: linear
|
46 |
- lr_scheduler_warmup_steps: 100
|
|
|
1 |
---
|
2 |
license: cc-by-nc-4.0
|
3 |
+
base_model: facebook/mms-1b-fl102
|
4 |
tags:
|
5 |
- generated_from_trainer
|
6 |
datasets:
|
|
|
15 |
|
16 |
# breeze-listen-w2v2-kn-GF
|
17 |
|
18 |
+
This model is a fine-tuned version of [facebook/mms-1b-fl102](https://huggingface.co/facebook/mms-1b-fl102) on the fleurs dataset.
|
19 |
|
20 |
## Model description
|
21 |
|
|
|
35 |
|
36 |
The following hyperparameters were used during training:
|
37 |
- learning_rate: 0.001
|
38 |
+
- train_batch_size: 4
|
39 |
- eval_batch_size: 8
|
40 |
- seed: 42
|
41 |
- distributed_type: multi-GPU
|
42 |
+
- gradient_accumulation_steps: 16
|
43 |
+
- total_train_batch_size: 64
|
44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
45 |
- lr_scheduler_type: linear
|
46 |
- lr_scheduler_warmup_steps: 100
|
all_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 3.99,
|
3 |
+
"train_loss": 3.364711216517857,
|
4 |
+
"train_runtime": 13678.922,
|
5 |
+
"train_samples": 2471,
|
6 |
+
"train_samples_per_second": 0.723,
|
7 |
+
"train_steps_per_second": 0.023
|
8 |
+
}
|
breeze-listen-w2v2-kn-GF.log
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
02/04/2024
|
2 |
-
02/04/2024
|
3 |
_n_gpu=1,
|
4 |
adafactor=False,
|
5 |
adam_beta1=0.9,
|
@@ -39,7 +39,7 @@ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
|
|
39 |
fsdp_min_num_params=0,
|
40 |
fsdp_transformer_layer_cls_to_wrap=None,
|
41 |
full_determinism=False,
|
42 |
-
gradient_accumulation_steps=
|
43 |
gradient_checkpointing=True,
|
44 |
gradient_checkpointing_kwargs=None,
|
45 |
greater_is_better=None,
|
@@ -64,7 +64,7 @@ local_rank=0,
|
|
64 |
log_level=passive,
|
65 |
log_level_replica=warning,
|
66 |
log_on_each_node=True,
|
67 |
-
logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/
|
68 |
logging_first_step=False,
|
69 |
logging_nan_inf_filter=True,
|
70 |
logging_steps=500,
|
@@ -84,7 +84,7 @@ output_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w
|
|
84 |
overwrite_output_dir=True,
|
85 |
past_index=-1,
|
86 |
per_device_eval_batch_size=8,
|
87 |
-
per_device_train_batch_size=
|
88 |
prediction_loss_only=False,
|
89 |
push_to_hub=True,
|
90 |
push_to_hub_model_id=None,
|
@@ -119,4 +119,4 @@ warmup_ratio=0.0,
|
|
119 |
warmup_steps=100,
|
120 |
weight_decay=0.0,
|
121 |
)
|
122 |
-
{'train_runtime':
|
|
|
1 |
+
02/04/2024 18:56:48 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
|
2 |
+
02/04/2024 18:56:48 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
|
3 |
_n_gpu=1,
|
4 |
adafactor=False,
|
5 |
adam_beta1=0.9,
|
|
|
39 |
fsdp_min_num_params=0,
|
40 |
fsdp_transformer_layer_cls_to_wrap=None,
|
41 |
full_determinism=False,
|
42 |
+
gradient_accumulation_steps=16,
|
43 |
gradient_checkpointing=True,
|
44 |
gradient_checkpointing_kwargs=None,
|
45 |
greater_is_better=None,
|
|
|
64 |
log_level=passive,
|
65 |
log_level_replica=warning,
|
66 |
log_on_each_node=True,
|
67 |
+
logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/Feb04_18-56-48_knight,
|
68 |
logging_first_step=False,
|
69 |
logging_nan_inf_filter=True,
|
70 |
logging_steps=500,
|
|
|
84 |
overwrite_output_dir=True,
|
85 |
past_index=-1,
|
86 |
per_device_eval_batch_size=8,
|
87 |
+
per_device_train_batch_size=4,
|
88 |
prediction_loss_only=False,
|
89 |
push_to_hub=True,
|
90 |
push_to_hub_model_id=None,
|
|
|
119 |
warmup_steps=100,
|
120 |
weight_decay=0.0,
|
121 |
)
|
122 |
+
{'train_runtime': 12199.309, 'train_samples_per_second': 0.81, 'train_steps_per_second': 0.012, 'train_loss': 3.1379870364540503, 'epoch': 3.94}
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "facebook/mms-1b-
|
3 |
"activation_dropout": 0.05,
|
4 |
"adapter_attn_dim": 16,
|
5 |
"adapter_kernel_size": 3,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "facebook/mms-1b-fl102",
|
3 |
"activation_dropout": 0.05,
|
4 |
"adapter_attn_dim": 16,
|
5 |
"adapter_kernel_size": 3,
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3859264976
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5019ad31159ea5df95db8779bd8072d6f2dfcaa73fce699f4d1ef7fdd5b3043
|
3 |
size 3859264976
|
train-ctc-model.sh
CHANGED
@@ -55,8 +55,8 @@ echo "OUTDIR: ${OUTDIR}"
|
|
55 |
# Training parameters you can tweak. Feel free to directly change any of the parameters below.
|
56 |
|
57 |
MAX_EPOCHS=4
|
58 |
-
TRAIN_BATCH_SIZE=
|
59 |
-
EVAL_BATCH_SIZE=
|
60 |
LEARNING_RATE="1e-3"
|
61 |
|
62 |
EVAL_STEPS="1000"
|
@@ -80,6 +80,7 @@ python ${SCRIPT_DIR}/run_speech_recognition_ctc_adapter.py \
|
|
80 |
--output_dir="${OUTDIR}" \
|
81 |
--num_train_epochs="${MAX_EPOCHS}" \
|
82 |
--per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
|
|
|
83 |
--learning_rate="${LEARNING_RATE}" \
|
84 |
--warmup_steps="100" \
|
85 |
--evaluation_strategy="steps" \
|
|
|
55 |
# Training parameters you can tweak. Feel free to directly change any of the parameters below.
|
56 |
|
57 |
MAX_EPOCHS=4
|
58 |
+
TRAIN_BATCH_SIZE=1
|
59 |
+
EVAL_BATCH_SIZE=1
|
60 |
LEARNING_RATE="1e-3"
|
61 |
|
62 |
EVAL_STEPS="1000"
|
|
|
80 |
--output_dir="${OUTDIR}" \
|
81 |
--num_train_epochs="${MAX_EPOCHS}" \
|
82 |
--per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
|
83 |
+
--gradient_accumulation_steps="32" \
|
84 |
--learning_rate="${LEARNING_RATE}" \
|
85 |
--warmup_steps="100" \
|
86 |
--evaluation_strategy="steps" \
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 3.99,
|
3 |
+
"train_loss": 3.364711216517857,
|
4 |
+
"train_runtime": 13678.922,
|
5 |
+
"train_samples": 2471,
|
6 |
+
"train_samples_per_second": 0.723,
|
7 |
+
"train_steps_per_second": 0.023
|
8 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.9886685552407934,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 308,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 3.99,
|
13 |
+
"step": 308,
|
14 |
+
"total_flos": 1.1188723782736937e+19,
|
15 |
+
"train_loss": 3.364711216517857,
|
16 |
+
"train_runtime": 13678.922,
|
17 |
+
"train_samples_per_second": 0.723,
|
18 |
+
"train_steps_per_second": 0.023
|
19 |
+
}
|
20 |
+
],
|
21 |
+
"logging_steps": 500,
|
22 |
+
"max_steps": 308,
|
23 |
+
"num_input_tokens_seen": 0,
|
24 |
+
"num_train_epochs": 4,
|
25 |
+
"save_steps": 1000,
|
26 |
+
"total_flos": 1.1188723782736937e+19,
|
27 |
+
"train_batch_size": 1,
|
28 |
+
"trial_name": null,
|
29 |
+
"trial_params": null
|
30 |
+
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aaa9085f71e66c7740edfe87fb1bf5ecbe420672688ea0bc427245f20a7e66a
|
3 |
size 4856
|