Edison commited on
Commit
908c75e
·
1 Parent(s): d8974df

Model save

Browse files
Files changed (3) hide show
  1. pytorch_model.bin +1 -1
  2. trainer_state.json +262 -52
  3. training_args.bin +1 -1
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dca950ade2c9b68106a456900c953cfde7fde39144bab7d02b46f0ce47c36b50
3
  size 267978033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8409188c6a0dfcb31868f51aacf3e6357c3bb0cba81c0b082b9a213ac4847595
3
  size 267978033
trainer_state.json CHANGED
@@ -1,95 +1,305 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
- "global_step": 1200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 1.605e-05,
13
- "loss": 2.5445,
14
- "step": 240
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_loss": 2.3416366577148438,
19
- "eval_runtime": 15.709,
20
- "eval_samples_per_second": 975.173,
21
- "eval_steps_per_second": 15.278,
22
- "step": 240
23
  },
24
  {
25
  "epoch": 2.0,
26
- "learning_rate": 1.2050000000000002e-05,
27
- "loss": 2.4313,
28
- "step": 480
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_loss": 2.306915521621704,
33
- "eval_runtime": 15.7874,
34
- "eval_samples_per_second": 970.331,
35
- "eval_steps_per_second": 15.202,
36
- "step": 480
37
  },
38
  {
39
  "epoch": 3.0,
40
- "learning_rate": 8.050000000000001e-06,
41
- "loss": 2.3947,
42
- "step": 720
43
  },
44
  {
45
  "epoch": 3.0,
46
- "eval_loss": 2.283115863800049,
47
- "eval_runtime": 15.8101,
48
- "eval_samples_per_second": 968.938,
49
- "eval_steps_per_second": 15.18,
50
- "step": 720
51
  },
52
  {
53
  "epoch": 4.0,
54
- "learning_rate": 4.05e-06,
55
- "loss": 2.365,
56
- "step": 960
57
  },
58
  {
59
  "epoch": 4.0,
60
- "eval_loss": 2.270317792892456,
61
- "eval_runtime": 15.8204,
62
- "eval_samples_per_second": 968.307,
63
- "eval_steps_per_second": 15.17,
64
- "step": 960
65
  },
66
  {
67
  "epoch": 5.0,
68
- "learning_rate": 5.0000000000000004e-08,
69
- "loss": 2.3579,
70
- "step": 1200
71
  },
72
  {
73
  "epoch": 5.0,
74
- "eval_loss": 2.2615389823913574,
75
- "eval_runtime": 15.8209,
76
- "eval_samples_per_second": 968.274,
77
- "eval_steps_per_second": 15.17,
78
- "step": 1200
79
  },
80
  {
81
- "epoch": 5.0,
82
- "step": 1200,
83
- "total_flos": 5076760875586560.0,
84
- "train_loss": 2.4186907958984376,
85
- "train_runtime": 374.7542,
86
- "train_samples_per_second": 204.387,
87
- "train_steps_per_second": 3.202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  }
89
  ],
90
- "max_steps": 1200,
91
- "num_train_epochs": 5,
92
- "total_flos": 5076760875586560.0,
93
  "trial_name": null,
94
  "trial_params": null
95
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "global_step": 7660,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 1.9005221932114882e-05,
13
+ "loss": 2.5049,
14
+ "step": 383
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_loss": 2.3055639266967773,
19
+ "eval_runtime": 6.5178,
20
+ "eval_samples_per_second": 940.197,
21
+ "eval_steps_per_second": 14.729,
22
+ "step": 383
23
  },
24
  {
25
  "epoch": 2.0,
26
+ "learning_rate": 1.8005221932114885e-05,
27
+ "loss": 2.3896,
28
+ "step": 766
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "eval_loss": 2.246039390563965,
33
+ "eval_runtime": 6.5445,
34
+ "eval_samples_per_second": 936.352,
35
+ "eval_steps_per_second": 14.669,
36
+ "step": 766
37
  },
38
  {
39
  "epoch": 3.0,
40
+ "learning_rate": 1.7005221932114885e-05,
41
+ "loss": 2.3458,
42
+ "step": 1149
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "eval_loss": 2.2351295948028564,
47
+ "eval_runtime": 6.5331,
48
+ "eval_samples_per_second": 937.992,
49
+ "eval_steps_per_second": 14.694,
50
+ "step": 1149
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "learning_rate": 1.6005221932114884e-05,
55
+ "loss": 2.3097,
56
+ "step": 1532
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "eval_loss": 2.1917026042938232,
61
+ "eval_runtime": 6.5235,
62
+ "eval_samples_per_second": 939.379,
63
+ "eval_steps_per_second": 14.716,
64
+ "step": 1532
65
  },
66
  {
67
  "epoch": 5.0,
68
+ "learning_rate": 1.5005221932114883e-05,
69
+ "loss": 2.2839,
70
+ "step": 1915
71
  },
72
  {
73
  "epoch": 5.0,
74
+ "eval_loss": 2.193546772003174,
75
+ "eval_runtime": 6.5261,
76
+ "eval_samples_per_second": 938.992,
77
+ "eval_steps_per_second": 14.71,
78
+ "step": 1915
79
  },
80
  {
81
+ "epoch": 6.0,
82
+ "learning_rate": 1.4005221932114883e-05,
83
+ "loss": 2.2611,
84
+ "step": 2298
85
+ },
86
+ {
87
+ "epoch": 6.0,
88
+ "eval_loss": 2.174062490463257,
89
+ "eval_runtime": 6.5308,
90
+ "eval_samples_per_second": 938.316,
91
+ "eval_steps_per_second": 14.699,
92
+ "step": 2298
93
+ },
94
+ {
95
+ "epoch": 7.0,
96
+ "learning_rate": 1.3005221932114884e-05,
97
+ "loss": 2.2397,
98
+ "step": 2681
99
+ },
100
+ {
101
+ "epoch": 7.0,
102
+ "eval_loss": 2.151566743850708,
103
+ "eval_runtime": 6.525,
104
+ "eval_samples_per_second": 939.154,
105
+ "eval_steps_per_second": 14.713,
106
+ "step": 2681
107
+ },
108
+ {
109
+ "epoch": 8.0,
110
+ "learning_rate": 1.2005221932114883e-05,
111
+ "loss": 2.2234,
112
+ "step": 3064
113
+ },
114
+ {
115
+ "epoch": 8.0,
116
+ "eval_loss": 2.14640474319458,
117
+ "eval_runtime": 6.531,
118
+ "eval_samples_per_second": 938.292,
119
+ "eval_steps_per_second": 14.699,
120
+ "step": 3064
121
+ },
122
+ {
123
+ "epoch": 9.0,
124
+ "learning_rate": 1.1005221932114883e-05,
125
+ "loss": 2.2121,
126
+ "step": 3447
127
+ },
128
+ {
129
+ "epoch": 9.0,
130
+ "eval_loss": 2.124241590499878,
131
+ "eval_runtime": 6.5412,
132
+ "eval_samples_per_second": 936.826,
133
+ "eval_steps_per_second": 14.676,
134
+ "step": 3447
135
+ },
136
+ {
137
+ "epoch": 10.0,
138
+ "learning_rate": 1.0005221932114884e-05,
139
+ "loss": 2.2041,
140
+ "step": 3830
141
+ },
142
+ {
143
+ "epoch": 10.0,
144
+ "eval_loss": 2.1360511779785156,
145
+ "eval_runtime": 6.5352,
146
+ "eval_samples_per_second": 937.687,
147
+ "eval_steps_per_second": 14.69,
148
+ "step": 3830
149
+ },
150
+ {
151
+ "epoch": 11.0,
152
+ "learning_rate": 9.005221932114883e-06,
153
+ "loss": 2.1883,
154
+ "step": 4213
155
+ },
156
+ {
157
+ "epoch": 11.0,
158
+ "eval_loss": 2.1251063346862793,
159
+ "eval_runtime": 6.5334,
160
+ "eval_samples_per_second": 937.956,
161
+ "eval_steps_per_second": 14.694,
162
+ "step": 4213
163
+ },
164
+ {
165
+ "epoch": 12.0,
166
+ "learning_rate": 8.005221932114883e-06,
167
+ "loss": 2.185,
168
+ "step": 4596
169
+ },
170
+ {
171
+ "epoch": 12.0,
172
+ "eval_loss": 2.1296956539154053,
173
+ "eval_runtime": 6.5234,
174
+ "eval_samples_per_second": 939.386,
175
+ "eval_steps_per_second": 14.716,
176
+ "step": 4596
177
+ },
178
+ {
179
+ "epoch": 13.0,
180
+ "learning_rate": 7.005221932114883e-06,
181
+ "loss": 2.1712,
182
+ "step": 4979
183
+ },
184
+ {
185
+ "epoch": 13.0,
186
+ "eval_loss": 2.1061811447143555,
187
+ "eval_runtime": 6.5182,
188
+ "eval_samples_per_second": 940.138,
189
+ "eval_steps_per_second": 14.728,
190
+ "step": 4979
191
+ },
192
+ {
193
+ "epoch": 14.0,
194
+ "learning_rate": 6.005221932114883e-06,
195
+ "loss": 2.1648,
196
+ "step": 5362
197
+ },
198
+ {
199
+ "epoch": 14.0,
200
+ "eval_loss": 2.1048877239227295,
201
+ "eval_runtime": 6.5157,
202
+ "eval_samples_per_second": 940.496,
203
+ "eval_steps_per_second": 14.734,
204
+ "step": 5362
205
+ },
206
+ {
207
+ "epoch": 15.0,
208
+ "learning_rate": 5.005221932114883e-06,
209
+ "loss": 2.1587,
210
+ "step": 5745
211
+ },
212
+ {
213
+ "epoch": 15.0,
214
+ "eval_loss": 2.106553792953491,
215
+ "eval_runtime": 6.52,
216
+ "eval_samples_per_second": 939.88,
217
+ "eval_steps_per_second": 14.724,
218
+ "step": 5745
219
+ },
220
+ {
221
+ "epoch": 16.0,
222
+ "learning_rate": 4.005221932114883e-06,
223
+ "loss": 2.1532,
224
+ "step": 6128
225
+ },
226
+ {
227
+ "epoch": 16.0,
228
+ "eval_loss": 2.0981085300445557,
229
+ "eval_runtime": 6.5377,
230
+ "eval_samples_per_second": 937.338,
231
+ "eval_steps_per_second": 14.684,
232
+ "step": 6128
233
+ },
234
+ {
235
+ "epoch": 17.0,
236
+ "learning_rate": 3.005221932114883e-06,
237
+ "loss": 2.1472,
238
+ "step": 6511
239
+ },
240
+ {
241
+ "epoch": 17.0,
242
+ "eval_loss": 2.0925848484039307,
243
+ "eval_runtime": 6.5057,
244
+ "eval_samples_per_second": 941.95,
245
+ "eval_steps_per_second": 14.756,
246
+ "step": 6511
247
+ },
248
+ {
249
+ "epoch": 18.0,
250
+ "learning_rate": 2.005221932114883e-06,
251
+ "loss": 2.1462,
252
+ "step": 6894
253
+ },
254
+ {
255
+ "epoch": 18.0,
256
+ "eval_loss": 2.083235025405884,
257
+ "eval_runtime": 6.5322,
258
+ "eval_samples_per_second": 938.118,
259
+ "eval_steps_per_second": 14.696,
260
+ "step": 6894
261
+ },
262
+ {
263
+ "epoch": 19.0,
264
+ "learning_rate": 1.0052219321148825e-06,
265
+ "loss": 2.1437,
266
+ "step": 7277
267
+ },
268
+ {
269
+ "epoch": 19.0,
270
+ "eval_loss": 2.093729257583618,
271
+ "eval_runtime": 6.5339,
272
+ "eval_samples_per_second": 937.883,
273
+ "eval_steps_per_second": 14.693,
274
+ "step": 7277
275
+ },
276
+ {
277
+ "epoch": 20.0,
278
+ "learning_rate": 5.2219321148825064e-09,
279
+ "loss": 2.1386,
280
+ "step": 7660
281
+ },
282
+ {
283
+ "epoch": 20.0,
284
+ "eval_loss": 2.0927391052246094,
285
+ "eval_runtime": 6.5139,
286
+ "eval_samples_per_second": 940.762,
287
+ "eval_steps_per_second": 14.738,
288
+ "step": 7660
289
+ },
290
+ {
291
+ "epoch": 20.0,
292
+ "step": 7660,
293
+ "total_flos": 3.24907393591296e+16,
294
+ "train_loss": 2.228582644151334,
295
+ "train_runtime": 1800.6594,
296
+ "train_samples_per_second": 272.234,
297
+ "train_steps_per_second": 4.254
298
  }
299
  ],
300
+ "max_steps": 7660,
301
+ "num_train_epochs": 20,
302
+ "total_flos": 3.24907393591296e+16,
303
  "trial_name": null,
304
  "trial_params": null
305
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:209ad11520d3ca630da20a305bc405621a2e543e197e02fb3076d702b5d285c7
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b75bdebedc9876d7e86cf6128582b4594c481973d82cb1a9b2304d2f02c28ada
3
  size 3963