marinone94 commited on
Commit
8421a48
1 Parent(s): 4621ac3

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 3.82973051071167,
4
- "eval_runtime": 132.808,
5
  "eval_samples": 4620,
6
- "eval_samples_per_second": 34.787,
7
- "eval_steps_per_second": 4.352,
8
  "eval_wer": 1.0,
9
- "train_loss": 7.050228030182595,
10
- "train_runtime": 437.537,
11
  "train_samples": 11030,
12
- "train_samples_per_second": 25.209,
13
- "train_steps_per_second": 0.786
14
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "eval_loss": 2.91414213180542,
4
+ "eval_runtime": 133.9783,
5
  "eval_samples": 4620,
6
+ "eval_samples_per_second": 34.483,
7
+ "eval_steps_per_second": 4.314,
8
  "eval_wer": 1.0,
9
+ "train_loss": 3.289040254992108,
10
+ "train_runtime": 1292.4856,
11
  "train_samples": 11030,
12
+ "train_samples_per_second": 17.068,
13
+ "train_steps_per_second": 0.532
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 3.82973051071167,
4
- "eval_runtime": 132.808,
5
  "eval_samples": 4620,
6
- "eval_samples_per_second": 34.787,
7
- "eval_steps_per_second": 4.352,
8
  "eval_wer": 1.0
9
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "eval_loss": 2.91414213180542,
4
+ "eval_runtime": 133.9783,
5
  "eval_samples": 4620,
6
+ "eval_samples_per_second": 34.483,
7
+ "eval_steps_per_second": 4.314,
8
  "eval_wer": 1.0
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed159635bd947770842e25d915d8157fd198d2f06d5db2476c6663627e3beee7
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c01bc756f9d54db8c90d48e01252abf1115ffe23b3c7297aa9b48389b4e9132
3
  size 1262075377
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 7.050228030182595,
4
- "train_runtime": 437.537,
5
  "train_samples": 11030,
6
- "train_samples_per_second": 25.209,
7
- "train_steps_per_second": 0.786
8
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "train_loss": 3.289040254992108,
4
+ "train_runtime": 1292.4856,
5
  "train_samples": 11030,
6
+ "train_samples_per_second": 17.068,
7
+ "train_steps_per_second": 0.532
8
  }
trainer_state.json CHANGED
@@ -1,43 +1,70 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9978245105148659,
5
- "global_step": 344,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.29,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 12.2666,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.58,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 6.1937,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.87,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.1366,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 1.0,
30
- "step": 344,
31
- "total_flos": 1.0141067319340954e+18,
32
- "train_loss": 7.050228030182595,
33
- "train_runtime": 437.537,
34
- "train_samples_per_second": 25.209,
35
- "train_steps_per_second": 0.786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
- "max_steps": 344,
39
- "num_train_epochs": 1,
40
- "total_flos": 1.0141067319340954e+18,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.997824510514866,
5
+ "global_step": 688,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.29,
12
+ "learning_rate": 6.613023952095809e-05,
13
+ "loss": 5.1206,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.58,
18
+ "learning_rate": 5.490269461077844e-05,
19
+ "loss": 3.0901,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.87,
24
+ "learning_rate": 4.36751497005988e-05,
25
+ "loss": 3.0224,
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 1.16,
30
+ "learning_rate": 3.244760479041916e-05,
31
+ "loss": 2.9922,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 1.45,
36
+ "learning_rate": 2.1220059880239517e-05,
37
+ "loss": 2.9357,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 1.45,
42
+ "eval_loss": 2.9458744525909424,
43
+ "eval_runtime": 138.8724,
44
+ "eval_samples_per_second": 33.268,
45
+ "eval_steps_per_second": 4.162,
46
+ "eval_wer": 1.0,
47
+ "step": 500
48
+ },
49
+ {
50
+ "epoch": 1.74,
51
+ "learning_rate": 9.99251497005988e-06,
52
+ "loss": 2.9153,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 2.0,
57
+ "step": 688,
58
+ "total_flos": 2.0290377210557414e+18,
59
+ "train_loss": 3.289040254992108,
60
+ "train_runtime": 1292.4856,
61
+ "train_samples_per_second": 17.068,
62
+ "train_steps_per_second": 0.532
63
  }
64
  ],
65
+ "max_steps": 688,
66
+ "num_train_epochs": 2,
67
+ "total_flos": 2.0290377210557414e+18,
68
  "trial_name": null,
69
  "trial_params": null
70
  }