File size: 11,204 Bytes
d44aced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
{
  "best_metric": 0.7928669410150893,
  "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-masakhaner-yor/checkpoint-4000",
  "epoch": 73.52941176470588,
  "global_step": 5000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 2.94,
      "eval_accuracy_score": 0.9617709065354884,
      "eval_f1": 0.7467362924281985,
      "eval_loss": 0.16836212575435638,
      "eval_precision": 0.7044334975369458,
      "eval_recall": 0.7944444444444444,
      "eval_runtime": 4.2439,
      "eval_samples_per_second": 71.868,
      "eval_steps_per_second": 9.19,
      "step": 200
    },
    {
      "epoch": 5.88,
      "eval_accuracy_score": 0.9564300773014758,
      "eval_f1": 0.7172774869109947,
      "eval_loss": 0.20965266227722168,
      "eval_precision": 0.6782178217821783,
      "eval_recall": 0.7611111111111111,
      "eval_runtime": 4.2334,
      "eval_samples_per_second": 72.046,
      "eval_steps_per_second": 9.212,
      "step": 400
    },
    {
      "epoch": 7.35,
      "learning_rate": 4.865771812080537e-05,
      "loss": 0.1207,
      "step": 500
    },
    {
      "epoch": 8.82,
      "eval_accuracy_score": 0.9651440618411806,
      "eval_f1": 0.7470198675496688,
      "eval_loss": 0.19184653460979462,
      "eval_precision": 0.7139240506329114,
      "eval_recall": 0.7833333333333333,
      "eval_runtime": 4.2345,
      "eval_samples_per_second": 72.028,
      "eval_steps_per_second": 9.21,
      "step": 600
    },
    {
      "epoch": 11.76,
      "eval_accuracy_score": 0.9675333801827126,
      "eval_f1": 0.7758389261744967,
      "eval_loss": 0.20845761895179749,
      "eval_precision": 0.7506493506493507,
      "eval_recall": 0.8027777777777778,
      "eval_runtime": 4.2327,
      "eval_samples_per_second": 72.058,
      "eval_steps_per_second": 9.214,
      "step": 800
    },
    {
      "epoch": 14.71,
      "learning_rate": 4.697986577181208e-05,
      "loss": 0.0064,
      "step": 1000
    },
    {
      "epoch": 14.71,
      "eval_accuracy_score": 0.9676739283204497,
      "eval_f1": 0.768,
      "eval_loss": 0.20797914266586304,
      "eval_precision": 0.7384615384615385,
      "eval_recall": 0.8,
      "eval_runtime": 4.225,
      "eval_samples_per_second": 72.189,
      "eval_steps_per_second": 9.231,
      "step": 1000
    },
    {
      "epoch": 17.65,
      "eval_accuracy_score": 0.9612087139845397,
      "eval_f1": 0.7708609271523178,
      "eval_loss": 0.2564171552658081,
      "eval_precision": 0.7367088607594937,
      "eval_recall": 0.8083333333333333,
      "eval_runtime": 4.2295,
      "eval_samples_per_second": 72.112,
      "eval_steps_per_second": 9.221,
      "step": 1200
    },
    {
      "epoch": 20.59,
      "eval_accuracy_score": 0.967111735769501,
      "eval_f1": 0.7891156462585034,
      "eval_loss": 0.2191239595413208,
      "eval_precision": 0.7733333333333333,
      "eval_recall": 0.8055555555555556,
      "eval_runtime": 4.2308,
      "eval_samples_per_second": 72.09,
      "eval_steps_per_second": 9.218,
      "step": 1400
    },
    {
      "epoch": 22.06,
      "learning_rate": 4.530201342281879e-05,
      "loss": 0.0037,
      "step": 1500
    },
    {
      "epoch": 23.53,
      "eval_accuracy_score": 0.9680955727336613,
      "eval_f1": 0.7924016282225237,
      "eval_loss": 0.213937446475029,
      "eval_precision": 0.7745358090185677,
      "eval_recall": 0.8111111111111111,
      "eval_runtime": 4.2289,
      "eval_samples_per_second": 72.122,
      "eval_steps_per_second": 9.222,
      "step": 1600
    },
    {
      "epoch": 26.47,
      "eval_accuracy_score": 0.9631763879128602,
      "eval_f1": 0.7652645861601086,
      "eval_loss": 0.2679533362388611,
      "eval_precision": 0.7480106100795756,
      "eval_recall": 0.7833333333333333,
      "eval_runtime": 4.2299,
      "eval_samples_per_second": 72.106,
      "eval_steps_per_second": 9.22,
      "step": 1800
    },
    {
      "epoch": 29.41,
      "learning_rate": 4.36241610738255e-05,
      "loss": 0.0033,
      "step": 2000
    },
    {
      "epoch": 29.41,
      "eval_accuracy_score": 0.9633169360505973,
      "eval_f1": 0.7709190672153635,
      "eval_loss": 0.2611113488674164,
      "eval_precision": 0.7615176151761518,
      "eval_recall": 0.7805555555555556,
      "eval_runtime": 4.2112,
      "eval_samples_per_second": 72.426,
      "eval_steps_per_second": 9.261,
      "step": 2000
    },
    {
      "epoch": 32.35,
      "eval_accuracy_score": 0.9657062543921293,
      "eval_f1": 0.7735099337748345,
      "eval_loss": 0.2416420429944992,
      "eval_precision": 0.739240506329114,
      "eval_recall": 0.8111111111111111,
      "eval_runtime": 4.2178,
      "eval_samples_per_second": 72.312,
      "eval_steps_per_second": 9.246,
      "step": 2200
    },
    {
      "epoch": 35.29,
      "eval_accuracy_score": 0.9575544624033732,
      "eval_f1": 0.7470198675496688,
      "eval_loss": 0.26062682271003723,
      "eval_precision": 0.7139240506329114,
      "eval_recall": 0.7833333333333333,
      "eval_runtime": 4.2283,
      "eval_samples_per_second": 72.134,
      "eval_steps_per_second": 9.224,
      "step": 2400
    },
    {
      "epoch": 36.76,
      "learning_rate": 4.194630872483222e-05,
      "loss": 0.0033,
      "step": 2500
    },
    {
      "epoch": 38.24,
      "eval_accuracy_score": 0.9621925509486999,
      "eval_f1": 0.7560321715817695,
      "eval_loss": 0.2517440915107727,
      "eval_precision": 0.7305699481865285,
      "eval_recall": 0.7833333333333333,
      "eval_runtime": 4.221,
      "eval_samples_per_second": 72.257,
      "eval_steps_per_second": 9.239,
      "step": 2600
    },
    {
      "epoch": 41.18,
      "eval_accuracy_score": 0.9619114546732256,
      "eval_f1": 0.7779273216689098,
      "eval_loss": 0.25837406516075134,
      "eval_precision": 0.7545691906005222,
      "eval_recall": 0.8027777777777778,
      "eval_runtime": 4.2231,
      "eval_samples_per_second": 72.221,
      "eval_steps_per_second": 9.235,
      "step": 2800
    },
    {
      "epoch": 44.12,
      "learning_rate": 4.026845637583892e-05,
      "loss": 0.0021,
      "step": 3000
    },
    {
      "epoch": 44.12,
      "eval_accuracy_score": 0.9619114546732256,
      "eval_f1": 0.7852257181942545,
      "eval_loss": 0.2560689449310303,
      "eval_precision": 0.7735849056603774,
      "eval_recall": 0.7972222222222223,
      "eval_runtime": 4.2232,
      "eval_samples_per_second": 72.22,
      "eval_steps_per_second": 9.235,
      "step": 3000
    },
    {
      "epoch": 47.06,
      "eval_accuracy_score": 0.9631763879128602,
      "eval_f1": 0.757123473541384,
      "eval_loss": 0.24956972897052765,
      "eval_precision": 0.7400530503978779,
      "eval_recall": 0.775,
      "eval_runtime": 4.2237,
      "eval_samples_per_second": 72.212,
      "eval_steps_per_second": 9.234,
      "step": 3200
    },
    {
      "epoch": 50.0,
      "eval_accuracy_score": 0.9655657062543921,
      "eval_f1": 0.7722222222222223,
      "eval_loss": 0.28158125281333923,
      "eval_precision": 0.7722222222222223,
      "eval_recall": 0.7722222222222223,
      "eval_runtime": 4.2211,
      "eval_samples_per_second": 72.256,
      "eval_steps_per_second": 9.239,
      "step": 3400
    },
    {
      "epoch": 51.47,
      "learning_rate": 3.859060402684564e-05,
      "loss": 0.002,
      "step": 3500
    },
    {
      "epoch": 52.94,
      "eval_accuracy_score": 0.9645818692902319,
      "eval_f1": 0.772117962466488,
      "eval_loss": 0.24671129882335663,
      "eval_precision": 0.7461139896373057,
      "eval_recall": 0.8,
      "eval_runtime": 4.2219,
      "eval_samples_per_second": 72.243,
      "eval_steps_per_second": 9.238,
      "step": 3600
    },
    {
      "epoch": 55.88,
      "eval_accuracy_score": 0.9659873506676037,
      "eval_f1": 0.7923497267759563,
      "eval_loss": 0.2602536678314209,
      "eval_precision": 0.7795698924731183,
      "eval_recall": 0.8055555555555556,
      "eval_runtime": 4.2241,
      "eval_samples_per_second": 72.205,
      "eval_steps_per_second": 9.233,
      "step": 3800
    },
    {
      "epoch": 58.82,
      "learning_rate": 3.6912751677852356e-05,
      "loss": 0.0013,
      "step": 4000
    },
    {
      "epoch": 58.82,
      "eval_accuracy_score": 0.9665495432185524,
      "eval_f1": 0.7928669410150893,
      "eval_loss": 0.291377454996109,
      "eval_precision": 0.7831978319783198,
      "eval_recall": 0.8027777777777778,
      "eval_runtime": 4.2136,
      "eval_samples_per_second": 72.384,
      "eval_steps_per_second": 9.256,
      "step": 4000
    },
    {
      "epoch": 61.76,
      "eval_accuracy_score": 0.9658468025298664,
      "eval_f1": 0.7842605156037992,
      "eval_loss": 0.280934602022171,
      "eval_precision": 0.76657824933687,
      "eval_recall": 0.8027777777777778,
      "eval_runtime": 4.2253,
      "eval_samples_per_second": 72.183,
      "eval_steps_per_second": 9.23,
      "step": 4200
    },
    {
      "epoch": 64.71,
      "eval_accuracy_score": 0.9621925509486999,
      "eval_f1": 0.7779273216689098,
      "eval_loss": 0.3064756691455841,
      "eval_precision": 0.7545691906005222,
      "eval_recall": 0.8027777777777778,
      "eval_runtime": 4.2222,
      "eval_samples_per_second": 72.237,
      "eval_steps_per_second": 9.237,
      "step": 4400
    },
    {
      "epoch": 66.18,
      "learning_rate": 3.523489932885906e-05,
      "loss": 0.0021,
      "step": 4500
    },
    {
      "epoch": 67.65,
      "eval_accuracy_score": 0.9673928320449754,
      "eval_f1": 0.7667121418826739,
      "eval_loss": 0.2502681016921997,
      "eval_precision": 0.7533512064343163,
      "eval_recall": 0.7805555555555556,
      "eval_runtime": 4.2225,
      "eval_samples_per_second": 72.232,
      "eval_steps_per_second": 9.236,
      "step": 4600
    },
    {
      "epoch": 70.59,
      "eval_accuracy_score": 0.9641602248770204,
      "eval_f1": 0.7788331071913162,
      "eval_loss": 0.2737264335155487,
      "eval_precision": 0.7612732095490716,
      "eval_recall": 0.7972222222222223,
      "eval_runtime": 4.2229,
      "eval_samples_per_second": 72.225,
      "eval_steps_per_second": 9.235,
      "step": 4800
    },
    {
      "epoch": 73.53,
      "learning_rate": 3.3557046979865775e-05,
      "loss": 0.0013,
      "step": 5000
    },
    {
      "epoch": 73.53,
      "eval_accuracy_score": 0.963879128601546,
      "eval_f1": 0.7750677506775068,
      "eval_loss": 0.2737545967102051,
      "eval_precision": 0.7566137566137566,
      "eval_recall": 0.7944444444444444,
      "eval_runtime": 4.2199,
      "eval_samples_per_second": 72.277,
      "eval_steps_per_second": 9.242,
      "step": 5000
    },
    {
      "epoch": 73.53,
      "step": 5000,
      "total_flos": 2.085737378893056e+16,
      "train_loss": 0.014607224118709565,
      "train_runtime": 5667.0091,
      "train_samples_per_second": 84.701,
      "train_steps_per_second": 2.647
    }
  ],
  "max_steps": 15000,
  "num_train_epochs": 221,
  "total_flos": 2.085737378893056e+16,
  "trial_name": null,
  "trial_params": null
}