hanasim commited on
Commit
2e75bee
1 Parent(s): 9b479cc

Model save

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: cc-by-nc-4.0
3
- base_model: facebook/mms-1b-all
4
  tags:
5
  - generated_from_trainer
6
  datasets:
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # breeze-listen-w2v2-kn-GF
17
 
18
- This model is a fine-tuned version of [facebook/mms-1b-all](https://huggingface.co/facebook/mms-1b-all) on the fleurs dataset.
19
 
20
  ## Model description
21
 
@@ -35,12 +35,12 @@ More information needed
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 0.001
38
- - train_batch_size: 1
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - distributed_type: multi-GPU
42
- - gradient_accumulation_steps: 32
43
- - total_train_batch_size: 32
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_steps: 100
 
1
  ---
2
  license: cc-by-nc-4.0
3
+ base_model: facebook/mms-1b-fl102
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
 
16
  # breeze-listen-w2v2-kn-GF
17
 
18
+ This model is a fine-tuned version of [facebook/mms-1b-fl102](https://huggingface.co/facebook/mms-1b-fl102) on the fleurs dataset.
19
 
20
  ## Model description
21
 
 
35
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 0.001
38
+ - train_batch_size: 4
39
  - eval_batch_size: 8
40
  - seed: 42
41
  - distributed_type: multi-GPU
42
+ - gradient_accumulation_steps: 16
43
+ - total_train_batch_size: 64
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_steps: 100
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.99,
3
+ "train_loss": 3.364711216517857,
4
+ "train_runtime": 13678.922,
5
+ "train_samples": 2471,
6
+ "train_samples_per_second": 0.723,
7
+ "train_steps_per_second": 0.023
8
+ }
breeze-listen-w2v2-kn-GF.log CHANGED
@@ -1,5 +1,5 @@
1
- 02/04/2024 13:54:35 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
2
- 02/04/2024 13:54:35 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
3
  _n_gpu=1,
4
  adafactor=False,
5
  adam_beta1=0.9,
@@ -39,7 +39,7 @@ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
39
  fsdp_min_num_params=0,
40
  fsdp_transformer_layer_cls_to_wrap=None,
41
  full_determinism=False,
42
- gradient_accumulation_steps=32,
43
  gradient_checkpointing=True,
44
  gradient_checkpointing_kwargs=None,
45
  greater_is_better=None,
@@ -64,7 +64,7 @@ local_rank=0,
64
  log_level=passive,
65
  log_level_replica=warning,
66
  log_on_each_node=True,
67
- logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/Feb04_13-54-35_knight,
68
  logging_first_step=False,
69
  logging_nan_inf_filter=True,
70
  logging_steps=500,
@@ -84,7 +84,7 @@ output_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w
84
  overwrite_output_dir=True,
85
  past_index=-1,
86
  per_device_eval_batch_size=8,
87
- per_device_train_batch_size=1,
88
  prediction_loss_only=False,
89
  push_to_hub=True,
90
  push_to_hub_model_id=None,
@@ -119,4 +119,4 @@ warmup_ratio=0.0,
119
  warmup_steps=100,
120
  weight_decay=0.0,
121
  )
122
- {'train_runtime': 13678.922, 'train_samples_per_second': 0.723, 'train_steps_per_second': 0.023, 'train_loss': 3.364711216517857, 'epoch': 3.99}
 
1
+ 02/04/2024 18:56:48 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: True, 16-bits training: True
2
+ 02/04/2024 18:56:48 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
3
  _n_gpu=1,
4
  adafactor=False,
5
  adam_beta1=0.9,
 
39
  fsdp_min_num_params=0,
40
  fsdp_transformer_layer_cls_to_wrap=None,
41
  full_determinism=False,
42
+ gradient_accumulation_steps=16,
43
  gradient_checkpointing=True,
44
  gradient_checkpointing_kwargs=None,
45
  greater_is_better=None,
 
64
  log_level=passive,
65
  log_level_replica=warning,
66
  log_on_each_node=True,
67
+ logging_dir=/cosmos/home/sp-operator/ai/training/models/simpragma/breeze-listen-w2v2-kn-GF/runs/Feb04_18-56-48_knight,
68
  logging_first_step=False,
69
  logging_nan_inf_filter=True,
70
  logging_steps=500,
 
84
  overwrite_output_dir=True,
85
  past_index=-1,
86
  per_device_eval_batch_size=8,
87
+ per_device_train_batch_size=4,
88
  prediction_loss_only=False,
89
  push_to_hub=True,
90
  push_to_hub_model_id=None,
 
119
  warmup_steps=100,
120
  weight_decay=0.0,
121
  )
122
+ {'train_runtime': 12199.309, 'train_samples_per_second': 0.81, 'train_steps_per_second': 0.012, 'train_loss': 3.1379870364540503, 'epoch': 3.94}
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/mms-1b-all",
3
  "activation_dropout": 0.05,
4
  "adapter_attn_dim": 16,
5
  "adapter_kernel_size": 3,
 
1
  {
2
+ "_name_or_path": "facebook/mms-1b-fl102",
3
  "activation_dropout": 0.05,
4
  "adapter_attn_dim": 16,
5
  "adapter_kernel_size": 3,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84fe74380883a43f5a315e535ef7668f1661b11fa85249c1f5ea0efd5b201db3
3
  size 3859264976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5019ad31159ea5df95db8779bd8072d6f2dfcaa73fce699f4d1ef7fdd5b3043
3
  size 3859264976
train-ctc-model.sh CHANGED
@@ -55,8 +55,8 @@ echo "OUTDIR: ${OUTDIR}"
55
  # Training parameters you can tweak. Feel free to directly change any of the parameters below.
56
 
57
  MAX_EPOCHS=4
58
- TRAIN_BATCH_SIZE=2
59
- EVAL_BATCH_SIZE=2
60
  LEARNING_RATE="1e-3"
61
 
62
  EVAL_STEPS="1000"
@@ -80,6 +80,7 @@ python ${SCRIPT_DIR}/run_speech_recognition_ctc_adapter.py \
80
  --output_dir="${OUTDIR}" \
81
  --num_train_epochs="${MAX_EPOCHS}" \
82
  --per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
 
83
  --learning_rate="${LEARNING_RATE}" \
84
  --warmup_steps="100" \
85
  --evaluation_strategy="steps" \
 
55
  # Training parameters you can tweak. Feel free to directly change any of the parameters below.
56
 
57
  MAX_EPOCHS=4
58
+ TRAIN_BATCH_SIZE=1
59
+ EVAL_BATCH_SIZE=1
60
  LEARNING_RATE="1e-3"
61
 
62
  EVAL_STEPS="1000"
 
80
  --output_dir="${OUTDIR}" \
81
  --num_train_epochs="${MAX_EPOCHS}" \
82
  --per_device_train_batch_size="${TRAIN_BATCH_SIZE}" \
83
+ --gradient_accumulation_steps="32" \
84
  --learning_rate="${LEARNING_RATE}" \
85
  --warmup_steps="100" \
86
  --evaluation_strategy="steps" \
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.99,
3
+ "train_loss": 3.364711216517857,
4
+ "train_runtime": 13678.922,
5
+ "train_samples": 2471,
6
+ "train_samples_per_second": 0.723,
7
+ "train_steps_per_second": 0.023
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.9886685552407934,
5
+ "eval_steps": 1000,
6
+ "global_step": 308,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 3.99,
13
+ "step": 308,
14
+ "total_flos": 1.1188723782736937e+19,
15
+ "train_loss": 3.364711216517857,
16
+ "train_runtime": 13678.922,
17
+ "train_samples_per_second": 0.723,
18
+ "train_steps_per_second": 0.023
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 308,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 4,
25
+ "save_steps": 1000,
26
+ "total_flos": 1.1188723782736937e+19,
27
+ "train_batch_size": 1,
28
+ "trial_name": null,
29
+ "trial_params": null
30
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:889214cde0e59492b6da27c312fdf4c9aa1ba5439a639a7b5f6e3a9dc0f91c73
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aaa9085f71e66c7740edfe87fb1bf5ecbe420672688ea0bc427245f20a7e66a
3
  size 4856