MHGanainy commited on
Commit
0104c8b
1 Parent(s): 544222d

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -42
  2. train_results.json +6 -6
  3. trainer_state.json +151 -189
all_results.json CHANGED
@@ -1,45 +1,9 @@
1
  {
2
- "epoch": 14.0,
3
- "eval_accuracy": 0.17645015630427233,
4
- "eval_classification_report": " precision recall f1-score support\n\n AC 0.0000 0.0000 0.0000 65\n ATIO 0.0000 0.0000 0.0000 26\n LC 0.0000 0.0000 0.0000 33\n NALYSIS 0.0000 0.0000 0.0000 92\n ONE 0.0000 0.0000 0.0000 60\n PC 0.0000 0.0000 0.0000 31\n REAMBLE 0.0000 0.0000 0.0000 30\nRE_NOT_RELIED 0.0000 0.0000 0.0000 5\n RE_RELIED 0.0000 0.0000 0.0000 29\nRG_PETITIONER 0.0000 0.0000 0.0000 19\nRG_RESPONDENT 0.0000 0.0000 0.0000 13\n SSUE 0.0000 0.0000 0.0000 23\n TA 0.0000 0.0000 0.0000 28\n\n micro avg 0.0000 0.0000 0.0000 454\n macro avg 0.0000 0.0000 0.0000 454\n weighted avg 0.0000 0.0000 0.0000 454\n",
5
- "eval_f1": 0.0,
6
- "eval_loss": NaN,
7
- "eval_macro-f1": 0.023074651949762666,
8
- "eval_micro-f1": 0.17645015630427233,
9
- "eval_micro_f1": 0.0,
10
- "eval_precision": 0.0,
11
- "eval_precision-macro": 0.013573088946482487,
12
- "eval_precision-micro": 0.17645015630427233,
13
- "eval_recall": 0.0,
14
- "eval_recall-macro": 0.07692307692307693,
15
- "eval_recall-micro": 0.17645015630427233,
16
- "eval_runtime": 1.8343,
17
- "eval_samples": 30,
18
- "eval_samples_per_second": 16.355,
19
- "eval_steps_per_second": 4.361,
20
- "predict_accuracy": 0.1738816738816739,
21
- "predict_eval_accuracy": 0.1738816738816739,
22
- "predict_eval_classification_report": " precision recall f1-score support\n\n AC 0.0000 0.0000 0.0000 102\n ATIO 0.0000 0.0000 0.0000 51\n LC 0.0000 0.0000 0.0000 42\n NALYSIS 0.0000 0.0000 0.0000 148\n ONE 0.0000 0.0000 0.0000 98\n PC 0.0000 0.0000 0.0000 63\n REAMBLE 0.0000 0.0000 0.0000 54\nRE_NOT_RELIED 0.0000 0.0000 0.0000 1\n RE_RELIED 0.0000 0.0000 0.0000 70\nRG_PETITIONER 0.0000 0.0000 0.0000 49\nRG_RESPONDENT 0.0000 0.0000 0.0000 27\n SSUE 0.0000 0.0000 0.0000 22\n TA 0.0000 0.0000 0.0000 61\n\n micro avg 0.0000 0.0000 0.0000 788\n macro avg 0.0000 0.0000 0.0000 788\n weighted avg 0.0000 0.0000 0.0000 788\n",
23
- "predict_eval_f1": 0.0,
24
- "predict_eval_micro-f1": 0.0,
25
- "predict_eval_micro_f1": 0.0,
26
- "predict_eval_precision": 0.0,
27
- "predict_eval_recall": 0.0,
28
- "predict_loss": NaN,
29
- "predict_macro-f1": 0.022788520637322115,
30
- "predict_micro-f1": 0.1738816738816739,
31
- "predict_precision-macro": 0.013375513375513376,
32
- "predict_precision-micro": 0.1738816738816739,
33
- "predict_recall-macro": 0.07692307692307693,
34
- "predict_recall-micro": 0.1738816738816739,
35
- "predict_runtime": 2.1377,
36
- "predict_samples": 50,
37
- "predict_samples_per_second": 23.39,
38
- "predict_steps_per_second": 6.081,
39
- "total_flos": 6.747257278287053e+16,
40
- "train_loss": 0.6624447870913739,
41
- "train_runtime": 488.5119,
42
  "train_samples": 247,
43
- "train_samples_per_second": 10.112,
44
- "train_steps_per_second": 2.538
45
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "total_flos": 5.301416432939827e+16,
4
+ "train_loss": 0.6726446291568342,
5
+ "train_runtime": 895.6751,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "train_samples": 247,
7
+ "train_samples_per_second": 5.515,
8
+ "train_steps_per_second": 2.769
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 14.0,
3
- "total_flos": 6.747257278287053e+16,
4
- "train_loss": 0.6624447870913739,
5
- "train_runtime": 488.5119,
6
  "train_samples": 247,
7
- "train_samples_per_second": 10.112,
8
- "train_steps_per_second": 2.538
9
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "total_flos": 5.301416432939827e+16,
4
+ "train_loss": 0.6726446291568342,
5
+ "train_runtime": 895.6751,
6
  "train_samples": 247,
7
+ "train_samples_per_second": 5.515,
8
+ "train_steps_per_second": 2.769
9
  }
trainer_state.json CHANGED
@@ -1,242 +1,204 @@
1
  {
2
- "best_metric": 0.7829107328933658,
3
- "best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-682",
4
- "epoch": 14.0,
5
  "eval_steps": 500,
6
- "global_step": 868,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6509204584925321,
14
- "eval_loss": 1.1796680688858032,
15
- "eval_macro-f1": 0.2406374552281572,
16
- "eval_micro-f1": 0.6509204584925321,
17
- "eval_precision-macro": 0.36507985738726567,
18
- "eval_precision-micro": 0.6509204584925321,
19
- "eval_recall-macro": 0.24249037703341708,
20
- "eval_recall-micro": 0.6509204584925321,
21
- "eval_runtime": 1.5286,
22
- "eval_samples_per_second": 19.625,
23
- "eval_steps_per_second": 5.233,
24
- "step": 62
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.7349774227162209,
29
- "eval_loss": 0.8353763222694397,
30
- "eval_macro-f1": 0.5254710956125371,
31
- "eval_micro-f1": 0.7349774227162209,
32
- "eval_precision-macro": 0.5349847227650002,
33
- "eval_precision-micro": 0.7349774227162209,
34
- "eval_recall-macro": 0.5291052629047126,
35
- "eval_recall-micro": 0.7349774227162209,
36
- "eval_runtime": 1.6436,
37
- "eval_samples_per_second": 18.252,
38
- "eval_steps_per_second": 4.867,
39
- "step": 124
40
  },
41
  {
42
  "epoch": 3.0,
43
- "eval_accuracy": 0.7342827370614797,
44
- "eval_loss": 0.8058456182479858,
45
- "eval_macro-f1": 0.536617185045311,
46
- "eval_micro-f1": 0.7342827370614797,
47
- "eval_precision-macro": 0.5558680331273026,
48
- "eval_precision-micro": 0.7342827370614797,
49
- "eval_recall-macro": 0.5381948279596481,
50
- "eval_recall-micro": 0.7342827370614797,
51
- "eval_runtime": 1.5882,
52
- "eval_samples_per_second": 18.889,
53
- "eval_steps_per_second": 5.037,
54
- "step": 186
55
  },
56
  {
57
  "epoch": 4.0,
58
- "eval_accuracy": 0.7502605071205279,
59
- "eval_loss": 0.7717716097831726,
60
- "eval_macro-f1": 0.530010703703156,
61
- "eval_micro-f1": 0.7502605071205279,
62
- "eval_precision-macro": 0.6245985387634561,
63
- "eval_precision-micro": 0.7502605071205279,
64
- "eval_recall-macro": 0.5200955397553431,
65
- "eval_recall-micro": 0.7502605071205279,
66
- "eval_runtime": 1.601,
67
- "eval_samples_per_second": 18.738,
68
- "eval_steps_per_second": 4.997,
69
- "step": 248
 
 
 
 
 
 
 
70
  },
71
  {
72
  "epoch": 5.0,
73
  "eval_accuracy": 0.7641542202153525,
74
- "eval_loss": 0.7306948900222778,
75
- "eval_macro-f1": 0.5578515609115684,
76
  "eval_micro-f1": 0.7641542202153525,
77
- "eval_precision-macro": 0.5889876526435496,
78
  "eval_precision-micro": 0.7641542202153525,
79
- "eval_recall-macro": 0.5462553107739512,
80
  "eval_recall-micro": 0.7641542202153525,
81
- "eval_runtime": 2.1379,
82
- "eval_samples_per_second": 14.032,
83
- "eval_steps_per_second": 3.742,
84
- "step": 310
85
  },
86
  {
87
  "epoch": 6.0,
88
- "eval_accuracy": 0.774574505036471,
89
- "eval_loss": 0.7098783254623413,
90
- "eval_macro-f1": 0.5481274858908393,
91
- "eval_micro-f1": 0.774574505036471,
92
- "eval_precision-macro": 0.6076438792386994,
93
- "eval_precision-micro": 0.774574505036471,
94
- "eval_recall-macro": 0.5431283891127849,
95
- "eval_recall-micro": 0.774574505036471,
96
- "eval_runtime": 2.1932,
97
- "eval_samples_per_second": 13.679,
98
- "eval_steps_per_second": 3.648,
99
- "step": 372
100
- },
101
- {
102
- "epoch": 7.0,
103
  "eval_accuracy": 0.7811740187565127,
104
- "eval_loss": 0.7071970701217651,
105
- "eval_macro-f1": 0.5261426411307122,
106
  "eval_micro-f1": 0.7811740187565127,
107
- "eval_precision-macro": 0.6089513985670642,
108
  "eval_precision-micro": 0.7811740187565127,
109
- "eval_recall-macro": 0.5125569147899907,
110
  "eval_recall-micro": 0.7811740187565127,
111
- "eval_runtime": 1.6371,
112
- "eval_samples_per_second": 18.326,
113
- "eval_steps_per_second": 4.887,
114
- "step": 434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 8.0,
118
- "eval_accuracy": 0.7825633900659952,
119
- "eval_loss": 0.6919089555740356,
120
- "eval_macro-f1": 0.5675875408188613,
121
- "eval_micro-f1": 0.7825633900659952,
122
- "eval_precision-macro": 0.6321251715307294,
123
- "eval_precision-micro": 0.7825633900659952,
124
- "eval_recall-macro": 0.5470775441802167,
125
- "eval_recall-micro": 0.7825633900659952,
126
- "eval_runtime": 1.6786,
127
- "eval_samples_per_second": 17.872,
128
- "eval_steps_per_second": 4.766,
129
- "step": 496
130
  },
131
  {
132
  "epoch": 8.064516129032258,
133
- "grad_norm": 7.422909736633301,
134
  "learning_rate": 1.7951612903225806e-05,
135
- "loss": 0.8758,
136
- "step": 500
137
  },
138
  {
139
  "epoch": 9.0,
140
- "eval_accuracy": 0.7735324765543592,
141
- "eval_loss": 0.7503196597099304,
142
- "eval_macro-f1": 0.569622502930968,
143
- "eval_micro-f1": 0.7735324765543592,
144
- "eval_precision-macro": 0.5665598749803964,
145
- "eval_precision-micro": 0.7735324765543592,
146
- "eval_recall-macro": 0.5818475586367124,
147
- "eval_recall-micro": 0.7735324765543592,
148
- "eval_runtime": 1.5468,
149
- "eval_samples_per_second": 19.395,
150
- "eval_steps_per_second": 5.172,
151
- "step": 558
152
  },
153
  {
154
  "epoch": 10.0,
155
- "eval_accuracy": 0.7783952761375478,
156
- "eval_loss": 0.7511970400810242,
157
- "eval_macro-f1": 0.5755339015228546,
158
- "eval_micro-f1": 0.7783952761375478,
159
- "eval_precision-macro": 0.6053985952851118,
160
- "eval_precision-micro": 0.7783952761375478,
161
- "eval_recall-macro": 0.5655629578179421,
162
- "eval_recall-micro": 0.7783952761375478,
163
- "eval_runtime": 2.2548,
164
- "eval_samples_per_second": 13.305,
165
- "eval_steps_per_second": 3.548,
166
- "step": 620
167
  },
168
  {
169
  "epoch": 11.0,
170
- "eval_accuracy": 0.7829107328933658,
171
- "eval_loss": 0.7655877470970154,
172
- "eval_macro-f1": 0.591328685794211,
173
- "eval_micro-f1": 0.7829107328933658,
174
- "eval_precision-macro": 0.6085589543807931,
175
- "eval_precision-micro": 0.7829107328933658,
176
- "eval_recall-macro": 0.5834711775606751,
177
- "eval_recall-micro": 0.7829107328933658,
178
- "eval_runtime": 2.2566,
179
- "eval_samples_per_second": 13.295,
180
- "eval_steps_per_second": 3.545,
181
- "step": 682
182
  },
183
  {
184
- "epoch": 12.0,
185
- "eval_accuracy": 0.7738798193817298,
186
- "eval_loss": 0.786118745803833,
187
- "eval_macro-f1": 0.5843444583481733,
188
- "eval_micro-f1": 0.7738798193817298,
189
- "eval_precision-macro": 0.5971774078353586,
190
- "eval_precision-micro": 0.7738798193817298,
191
- "eval_recall-macro": 0.5885123710730829,
192
- "eval_recall-micro": 0.7738798193817298,
193
- "eval_runtime": 1.5545,
194
- "eval_samples_per_second": 19.299,
195
- "eval_steps_per_second": 5.146,
196
- "step": 744
197
- },
198
- {
199
- "epoch": 13.0,
200
- "eval_accuracy": 0.7780479333101772,
201
- "eval_loss": 0.8238919377326965,
202
- "eval_macro-f1": 0.5701476938599402,
203
- "eval_micro-f1": 0.7780479333101772,
204
- "eval_precision-macro": 0.5975031172688886,
205
- "eval_precision-micro": 0.7780479333101772,
206
- "eval_recall-macro": 0.5748658781079373,
207
- "eval_recall-micro": 0.7780479333101772,
208
- "eval_runtime": 1.5795,
209
- "eval_samples_per_second": 18.993,
210
- "eval_steps_per_second": 5.065,
211
- "step": 806
212
- },
213
- {
214
- "epoch": 14.0,
215
- "eval_accuracy": 0.7797846474470302,
216
- "eval_loss": 0.8271887302398682,
217
- "eval_macro-f1": 0.592619371017184,
218
- "eval_micro-f1": 0.7797846474470302,
219
- "eval_precision-macro": 0.6088825353073621,
220
- "eval_precision-micro": 0.7797846474470302,
221
- "eval_recall-macro": 0.5868004304340952,
222
- "eval_recall-micro": 0.7797846474470302,
223
- "eval_runtime": 2.326,
224
- "eval_samples_per_second": 12.898,
225
- "eval_steps_per_second": 3.439,
226
- "step": 868
227
- },
228
- {
229
- "epoch": 14.0,
230
- "step": 868,
231
- "total_flos": 6.747257278287053e+16,
232
- "train_loss": 0.6624447870913739,
233
- "train_runtime": 488.5119,
234
- "train_samples_per_second": 10.112,
235
- "train_steps_per_second": 2.538
236
  }
237
  ],
238
  "logging_steps": 500,
239
- "max_steps": 1240,
240
  "num_input_tokens_seen": 0,
241
  "num_train_epochs": 20,
242
  "save_steps": 500,
@@ -261,8 +223,8 @@
261
  "attributes": {}
262
  }
263
  },
264
- "total_flos": 6.747257278287053e+16,
265
- "train_batch_size": 4,
266
  "trial_name": null,
267
  "trial_params": null
268
  }
 
1
  {
2
+ "best_metric": 0.7929836748871136,
3
+ "best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-992",
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 1364,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.7155262243834665,
14
+ "eval_loss": 0.9702894687652588,
15
+ "eval_macro-f1": 0.3565778389617827,
16
+ "eval_micro-f1": 0.7155262243834665,
17
+ "eval_precision-macro": 0.5485369419220343,
18
+ "eval_precision-micro": 0.7155262243834665,
19
+ "eval_recall-macro": 0.34472385514524134,
20
+ "eval_recall-micro": 0.7155262243834665,
21
+ "eval_runtime": 3.534,
22
+ "eval_samples_per_second": 8.489,
23
+ "eval_steps_per_second": 4.244,
24
+ "step": 124
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "eval_accuracy": 0.7353247655435915,
29
+ "eval_loss": 0.8005266189575195,
30
+ "eval_macro-f1": 0.5080224547196512,
31
+ "eval_micro-f1": 0.7353247655435915,
32
+ "eval_precision-macro": 0.5180988726198199,
33
+ "eval_precision-micro": 0.7353247655435915,
34
+ "eval_recall-macro": 0.5222058583864981,
35
+ "eval_recall-micro": 0.7353247655435915,
36
+ "eval_runtime": 3.5582,
37
+ "eval_samples_per_second": 8.431,
38
+ "eval_steps_per_second": 4.216,
39
+ "step": 248
40
  },
41
  {
42
  "epoch": 3.0,
43
+ "eval_accuracy": 0.7453977075373394,
44
+ "eval_loss": 0.815595269203186,
45
+ "eval_macro-f1": 0.5288442820595285,
46
+ "eval_micro-f1": 0.7453977075373394,
47
+ "eval_precision-macro": 0.5625997012224085,
48
+ "eval_precision-micro": 0.7453977075373394,
49
+ "eval_recall-macro": 0.532191762651922,
50
+ "eval_recall-micro": 0.7453977075373394,
51
+ "eval_runtime": 3.3982,
52
+ "eval_samples_per_second": 8.828,
53
+ "eval_steps_per_second": 4.414,
54
+ "step": 372
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "eval_accuracy": 0.7704063911080237,
59
+ "eval_loss": 0.7056049108505249,
60
+ "eval_macro-f1": 0.5179788731015686,
61
+ "eval_micro-f1": 0.7704063911080237,
62
+ "eval_precision-macro": 0.5880911415103544,
63
+ "eval_precision-micro": 0.7704063911080237,
64
+ "eval_recall-macro": 0.5197063822818007,
65
+ "eval_recall-micro": 0.7704063911080237,
66
+ "eval_runtime": 3.325,
67
+ "eval_samples_per_second": 9.023,
68
+ "eval_steps_per_second": 4.511,
69
+ "step": 496
70
+ },
71
+ {
72
+ "epoch": 4.032258064516129,
73
+ "grad_norm": 7.048013210296631,
74
+ "learning_rate": 2.398790322580645e-05,
75
+ "loss": 1.0549,
76
+ "step": 500
77
  },
78
  {
79
  "epoch": 5.0,
80
  "eval_accuracy": 0.7641542202153525,
81
+ "eval_loss": 0.7525667548179626,
82
+ "eval_macro-f1": 0.5774815867474451,
83
  "eval_micro-f1": 0.7641542202153525,
84
+ "eval_precision-macro": 0.5877505487951785,
85
  "eval_precision-micro": 0.7641542202153525,
86
+ "eval_recall-macro": 0.5905806919233985,
87
  "eval_recall-micro": 0.7641542202153525,
88
+ "eval_runtime": 3.7943,
89
+ "eval_samples_per_second": 7.907,
90
+ "eval_steps_per_second": 3.953,
91
+ "step": 620
92
  },
93
  {
94
  "epoch": 6.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  "eval_accuracy": 0.7811740187565127,
96
+ "eval_loss": 0.7093824148178101,
97
+ "eval_macro-f1": 0.564925492252011,
98
  "eval_micro-f1": 0.7811740187565127,
99
+ "eval_precision-macro": 0.6335954751289583,
100
  "eval_precision-micro": 0.7811740187565127,
101
+ "eval_recall-macro": 0.5394598039562246,
102
  "eval_recall-micro": 0.7811740187565127,
103
+ "eval_runtime": 3.6662,
104
+ "eval_samples_per_second": 8.183,
105
+ "eval_steps_per_second": 4.091,
106
+ "step": 744
107
+ },
108
+ {
109
+ "epoch": 7.0,
110
+ "eval_accuracy": 0.780826675929142,
111
+ "eval_loss": 0.7391286492347717,
112
+ "eval_macro-f1": 0.5535439959165813,
113
+ "eval_micro-f1": 0.780826675929142,
114
+ "eval_precision-macro": 0.6475047138793736,
115
+ "eval_precision-micro": 0.780826675929142,
116
+ "eval_recall-macro": 0.5338983669485645,
117
+ "eval_recall-micro": 0.780826675929142,
118
+ "eval_runtime": 3.3578,
119
+ "eval_samples_per_second": 8.934,
120
+ "eval_steps_per_second": 4.467,
121
+ "step": 868
122
  },
123
  {
124
  "epoch": 8.0,
125
+ "eval_accuracy": 0.7929836748871136,
126
+ "eval_loss": 0.7354127168655396,
127
+ "eval_macro-f1": 0.5881256788610278,
128
+ "eval_micro-f1": 0.7929836748871136,
129
+ "eval_precision-macro": 0.616862114635611,
130
+ "eval_precision-micro": 0.7929836748871136,
131
+ "eval_recall-macro": 0.5756480546409108,
132
+ "eval_recall-micro": 0.7929836748871136,
133
+ "eval_runtime": 3.5028,
134
+ "eval_samples_per_second": 8.565,
135
+ "eval_steps_per_second": 4.282,
136
+ "step": 992
137
  },
138
  {
139
  "epoch": 8.064516129032258,
140
+ "grad_norm": 4.143438339233398,
141
  "learning_rate": 1.7951612903225806e-05,
142
+ "loss": 0.545,
143
+ "step": 1000
144
  },
145
  {
146
  "epoch": 9.0,
147
+ "eval_accuracy": 0.7804793331017714,
148
+ "eval_loss": 0.8143337965011597,
149
+ "eval_macro-f1": 0.5927580984411855,
150
+ "eval_micro-f1": 0.7804793331017714,
151
+ "eval_precision-macro": 0.5950513529543718,
152
+ "eval_precision-micro": 0.7804793331017714,
153
+ "eval_recall-macro": 0.5963301506624595,
154
+ "eval_recall-micro": 0.7804793331017714,
155
+ "eval_runtime": 3.3219,
156
+ "eval_samples_per_second": 9.031,
157
+ "eval_steps_per_second": 4.515,
158
+ "step": 1116
159
  },
160
  {
161
  "epoch": 10.0,
162
+ "eval_accuracy": 0.7794373046196597,
163
+ "eval_loss": 0.8351579904556274,
164
+ "eval_macro-f1": 0.5917556551043053,
165
+ "eval_micro-f1": 0.7794373046196597,
166
+ "eval_precision-macro": 0.602885463862158,
167
+ "eval_precision-micro": 0.7794373046196597,
168
+ "eval_recall-macro": 0.5915247045666512,
169
+ "eval_recall-micro": 0.7794373046196597,
170
+ "eval_runtime": 3.5978,
171
+ "eval_samples_per_second": 8.339,
172
+ "eval_steps_per_second": 4.169,
173
+ "step": 1240
174
  },
175
  {
176
  "epoch": 11.0,
177
+ "eval_accuracy": 0.7870788468218132,
178
+ "eval_loss": 0.8609802722930908,
179
+ "eval_macro-f1": 0.5742187338887501,
180
+ "eval_micro-f1": 0.7870788468218132,
181
+ "eval_precision-macro": 0.60147377967397,
182
+ "eval_precision-micro": 0.7870788468218132,
183
+ "eval_recall-macro": 0.5642213023272796,
184
+ "eval_recall-micro": 0.7870788468218132,
185
+ "eval_runtime": 24.3263,
186
+ "eval_samples_per_second": 1.233,
187
+ "eval_steps_per_second": 0.617,
188
+ "step": 1364
189
  },
190
  {
191
+ "epoch": 11.0,
192
+ "step": 1364,
193
+ "total_flos": 5.301416432939827e+16,
194
+ "train_loss": 0.6726446291568342,
195
+ "train_runtime": 895.6751,
196
+ "train_samples_per_second": 5.515,
197
+ "train_steps_per_second": 2.769
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  }
199
  ],
200
  "logging_steps": 500,
201
+ "max_steps": 2480,
202
  "num_input_tokens_seen": 0,
203
  "num_train_epochs": 20,
204
  "save_steps": 500,
 
223
  "attributes": {}
224
  }
225
  },
226
+ "total_flos": 5.301416432939827e+16,
227
+ "train_batch_size": 2,
228
  "trial_name": null,
229
  "trial_params": null
230
  }