marinone94 commited on
Commit
9f100c5
1 Parent(s): 695a47b

Training in progress, step 1500

Browse files
checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a2927b1e7a6af7703304a15b14ed79cddd8a561ab70c3f1630c008c3debc0ac
3
  size 2490362385
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cd9c62ff94cc2f259c8cbeff5c28cee1c8ef11d0981156e0eae5ca0fddb63a1
3
  size 2490362385
checkpoint-1500/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c146bd2ad9f313033c049f506c561ba8fa40b130d16254cb640b46a8117068a2
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd89b40b183844d2d1cf17d26aeb8e928fe510a53c8c087c715468330a4fb98
3
  size 1262075377
checkpoint-1500/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75859b6f10fc5cb61ebf19c5b9211484a9a37deea8e552cb0ba6fc98b090d7ff
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b1d791ba720c03cca9f2004a2681a7daf7371671890a4f96b90878506ee97e6
3
  size 559
checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4efdd67f0b22b26bc682e2f487b713df4629fa5b02c99d86373db7c1edd7d2a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033501fd55355c135dbf9f384ee2274d0c8246e49cc5023499a0046a5993e207
3
  size 623
checkpoint-1500/trainer_state.json CHANGED
@@ -9,124 +9,124 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.29,
12
- "learning_rate": 7.151470588235293e-05,
13
- "loss": 5.1135,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.58,
18
- "learning_rate": 6.710294117647058e-05,
19
- "loss": 3.0957,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.87,
24
- "learning_rate": 6.269117647058824e-05,
25
- "loss": 3.0078,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 1.16,
30
- "learning_rate": 5.827941176470588e-05,
31
- "loss": 2.9785,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 1.45,
36
- "learning_rate": 5.3867647058823525e-05,
37
- "loss": 2.9069,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 1.45,
42
- "eval_loss": 2.9046826362609863,
43
- "eval_runtime": 128.7688,
44
- "eval_samples_per_second": 35.878,
45
- "eval_steps_per_second": 4.489,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 1.74,
51
- "learning_rate": 4.945588235294117e-05,
52
- "loss": 2.8537,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 2.03,
57
- "learning_rate": 4.504411764705882e-05,
58
- "loss": 2.7928,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 2.32,
63
- "learning_rate": 4.063235294117647e-05,
64
- "loss": 2.6988,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 2.61,
69
- "learning_rate": 3.6220588235294115e-05,
70
- "loss": 2.6342,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 2.91,
75
- "learning_rate": 3.180882352941176e-05,
76
- "loss": 2.5875,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 2.91,
81
- "eval_loss": 2.415876865386963,
82
- "eval_runtime": 164.5912,
83
- "eval_samples_per_second": 28.07,
84
- "eval_steps_per_second": 3.512,
85
- "eval_wer": 1.199687939790736,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 3.2,
90
- "learning_rate": 2.739705882352941e-05,
91
- "loss": 2.5214,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 3.49,
96
- "learning_rate": 2.2985294117647057e-05,
97
- "loss": 2.4174,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 3.78,
102
- "learning_rate": 1.8573529411764705e-05,
103
- "loss": 2.3271,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 4.07,
108
- "learning_rate": 1.416176470588235e-05,
109
- "loss": 2.2784,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 4.36,
114
- "learning_rate": 9.75e-06,
115
- "loss": 2.2043,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 4.36,
120
- "eval_loss": 1.6191935539245605,
121
- "eval_runtime": 127.3244,
122
- "eval_samples_per_second": 36.285,
123
- "eval_steps_per_second": 4.54,
124
- "eval_wer": 0.9597993024536499,
125
  "step": 1500
126
  }
127
  ],
128
- "max_steps": 1720,
129
- "num_train_epochs": 5,
130
  "total_flos": 4.445118706030802e+18,
131
  "trial_name": null,
132
  "trial_params": null
 
9
  "log_history": [
10
  {
11
  "epoch": 0.29,
12
+ "learning_rate": 3.7125e-06,
13
+ "loss": 12.2713,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 0.58,
18
+ "learning_rate": 7.4625e-06,
19
+ "loss": 6.2026,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 0.87,
24
+ "learning_rate": 1.1212499999999998e-05,
25
+ "loss": 4.1374,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 1.16,
30
+ "learning_rate": 1.49625e-05,
31
+ "loss": 3.6755,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 1.45,
36
+ "learning_rate": 1.8712499999999997e-05,
37
+ "loss": 3.3332,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 1.45,
42
+ "eval_loss": 3.292029857635498,
43
+ "eval_runtime": 137.2974,
44
+ "eval_samples_per_second": 33.65,
45
+ "eval_steps_per_second": 4.21,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 1.74,
51
+ "learning_rate": 2.2462499999999997e-05,
52
+ "loss": 3.1418,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 2.03,
57
+ "learning_rate": 2.6212499999999997e-05,
58
+ "loss": 3.0879,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 2.32,
63
+ "learning_rate": 2.99625e-05,
64
+ "loss": 3.0216,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 2.61,
69
+ "learning_rate": 3.37125e-05,
70
+ "loss": 2.9595,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 2.91,
75
+ "learning_rate": 3.7462499999999996e-05,
76
+ "loss": 2.9269,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 2.91,
81
+ "eval_loss": 2.941540241241455,
82
+ "eval_runtime": 178.4388,
83
+ "eval_samples_per_second": 25.891,
84
+ "eval_steps_per_second": 3.239,
85
+ "eval_wer": 0.9966346448020559,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 3.2,
90
+ "learning_rate": 4.12125e-05,
91
+ "loss": 2.914,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 3.49,
96
+ "learning_rate": 4.4962499999999995e-05,
97
+ "loss": 2.8432,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 3.78,
102
+ "learning_rate": 4.871249999999999e-05,
103
+ "loss": 2.6828,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 4.07,
108
+ "learning_rate": 5.2462499999999994e-05,
109
+ "loss": 2.355,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 4.36,
114
+ "learning_rate": 5.62125e-05,
115
+ "loss": 2.0719,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 4.36,
120
+ "eval_loss": 1.164096713066101,
121
+ "eval_runtime": 134.2981,
122
+ "eval_samples_per_second": 34.401,
123
+ "eval_steps_per_second": 4.304,
124
+ "eval_wer": 0.8507923881784251,
125
  "step": 1500
126
  }
127
  ],
128
+ "max_steps": 17200,
129
+ "num_train_epochs": 50,
130
  "total_flos": 4.445118706030802e+18,
131
  "trial_name": null,
132
  "trial_params": null
checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1208f22bb7e06e1e9a51692db0520fbddfc3640941d51dfe45ba3188ada2ecbf
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a168f126d02648193e16ba893c3e1ef9f2c2de91803928caf5e25532aff1325
3
  size 2991
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5413d438578028bae2ac5a1c47311442081ff0b4a1db9b89c2080d69df74655b
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd89b40b183844d2d1cf17d26aeb8e928fe510a53c8c087c715468330a4fb98
3
  size 1262075377