k4black commited on
Commit
b66148a
1 Parent(s): 2dc0150

Training in progress, step 5200

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb078f66955011ea6afcb149ea16849afed6783f2a0810d9c8a62cd9a59686c5
3
  size 2843230968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:765ded4f85122106d3c7ca70ecc3286734cc83607e82caa135f0d00745a434b4
3
  size 2843230968
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:202a9040c68109cbb48b8fe2017897cfb1a7a3f10ba5d835b9cae6758ab0fd4f
3
  size 1421591285
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b58d358224de084112dd9ddc81a1e58fe3a83a78d808b5c279b4b2401d5f2b1f
3
  size 1421591285
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:778f07573f600fa48cbe5b11ab076696f67b51b6a5db2be2df0befcfb3b87a3b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06744a746681eb14d92a4d87a2c4a3f33f8f040e9ff0c2cc0f5d538801039dce
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2beb2962f0b6cdb6d771bb320fa65eb0c0fa5a9c4fbb9b329b61e29cdbef62fe
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1785d5e671bc65a68d5c3dfbd2d502885b77ad2217c8d158cb3c228d55f0b090
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.983181371856266,
3
- "best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-2800",
4
- "epoch": 0.081547064305685,
5
- "global_step": 2800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -118,11 +118,107 @@
118
  "eval_samples_per_second": 766.977,
119
  "eval_steps_per_second": 48.004,
120
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  }
122
  ],
123
  "max_steps": 103008,
124
  "num_train_epochs": 3,
125
- "total_flos": 5492559907900896.0,
126
  "trial_name": null,
127
  "trial_params": null
128
  }
 
1
  {
2
+ "best_metric": 0.9855364167633462,
3
+ "best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-4800",
4
+ "epoch": 0.15144454799627213,
5
+ "global_step": 5200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
118
  "eval_samples_per_second": 766.977,
119
  "eval_steps_per_second": 48.004,
120
  "step": 2800
121
+ },
122
+ {
123
+ "epoch": 0.09,
124
+ "learning_rate": 6.212385944476801e-06,
125
+ "loss": 0.1229,
126
+ "step": 3200
127
+ },
128
+ {
129
+ "epoch": 0.09,
130
+ "eval_accuracy": 0.9820158504369031,
131
+ "eval_f1": 0.9819483437268689,
132
+ "eval_loss": 0.10309657454490662,
133
+ "eval_runtime": 13.2533,
134
+ "eval_samples_per_second": 742.608,
135
+ "eval_steps_per_second": 46.479,
136
+ "step": 3200
137
+ },
138
+ {
139
+ "epoch": 0.1,
140
+ "learning_rate": 6.988934187536401e-06,
141
+ "loss": 0.1291,
142
+ "step": 3600
143
+ },
144
+ {
145
+ "epoch": 0.1,
146
+ "eval_accuracy": 0.9842511684616948,
147
+ "eval_f1": 0.9842093647131546,
148
+ "eval_loss": 0.08313809335231781,
149
+ "eval_runtime": 13.0051,
150
+ "eval_samples_per_second": 756.778,
151
+ "eval_steps_per_second": 47.366,
152
+ "step": 3600
153
+ },
154
+ {
155
+ "epoch": 0.12,
156
+ "learning_rate": 7.765482430596002e-06,
157
+ "loss": 0.1049,
158
+ "step": 4000
159
+ },
160
+ {
161
+ "epoch": 0.12,
162
+ "eval_accuracy": 0.9824222718959561,
163
+ "eval_f1": 0.9823847600112674,
164
+ "eval_loss": 0.0907953679561615,
165
+ "eval_runtime": 12.984,
166
+ "eval_samples_per_second": 758.01,
167
+ "eval_steps_per_second": 47.443,
168
+ "step": 4000
169
+ },
170
+ {
171
+ "epoch": 0.13,
172
+ "learning_rate": 8.542030673655602e-06,
173
+ "loss": 0.1243,
174
+ "step": 4400
175
+ },
176
+ {
177
+ "epoch": 0.13,
178
+ "eval_accuracy": 0.9823206665311929,
179
+ "eval_f1": 0.9822362137371193,
180
+ "eval_loss": 0.0925893783569336,
181
+ "eval_runtime": 12.9285,
182
+ "eval_samples_per_second": 761.262,
183
+ "eval_steps_per_second": 47.647,
184
+ "step": 4400
185
+ },
186
+ {
187
+ "epoch": 0.14,
188
+ "learning_rate": 9.318578916715203e-06,
189
+ "loss": 0.1291,
190
+ "step": 4800
191
+ },
192
+ {
193
+ "epoch": 0.14,
194
+ "eval_accuracy": 0.9855720382036172,
195
+ "eval_f1": 0.9855364167633462,
196
+ "eval_loss": 0.08309133350849152,
197
+ "eval_runtime": 12.8482,
198
+ "eval_samples_per_second": 766.023,
199
+ "eval_steps_per_second": 47.945,
200
+ "step": 4800
201
+ },
202
+ {
203
+ "epoch": 0.15,
204
+ "learning_rate": 9.994992693419992e-06,
205
+ "loss": 0.1148,
206
+ "step": 5200
207
+ },
208
+ {
209
+ "epoch": 0.15,
210
+ "eval_accuracy": 0.983946352367405,
211
+ "eval_f1": 0.9839146445196306,
212
+ "eval_loss": 0.08415436744689941,
213
+ "eval_runtime": 12.8411,
214
+ "eval_samples_per_second": 766.443,
215
+ "eval_steps_per_second": 47.971,
216
+ "step": 5200
217
  }
218
  ],
219
  "max_steps": 103008,
220
  "num_train_epochs": 3,
221
+ "total_flos": 1.020890573605968e+16,
222
  "trial_name": null,
223
  "trial_params": null
224
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:202a9040c68109cbb48b8fe2017897cfb1a7a3f10ba5d835b9cae6758ab0fd4f
3
  size 1421591285
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b58d358224de084112dd9ddc81a1e58fe3a83a78d808b5c279b4b2401d5f2b1f
3
  size 1421591285