File size: 8,528 Bytes
f8a5226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
{
  "best_metric": 0.61792588,
  "best_model_checkpoint": "/data/project/ys/swift/output/DZJ6B_base/v2-20240821-171924/checkpoint-100",
  "epoch": 2.983240223463687,
  "eval_steps": 100,
  "global_step": 267,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "acc": 0.81204844,
      "epoch": 0.0111731843575419,
      "grad_norm": 7.0027059641490315,
      "learning_rate": 0.0,
      "loss": 0.80392802,
      "memory(GiB)": 65.61,
      "step": 1,
      "train_speed(iter/s)": 0.016003
    },
    {
      "acc": 0.80695226,
      "epoch": 0.11173184357541899,
      "grad_norm": 0.891187038971179,
      "learning_rate": 0.0001,
      "loss": 0.72480848,
      "memory(GiB)": 77.56,
      "step": 10,
      "train_speed(iter/s)": 0.019435
    },
    {
      "acc": 0.82287941,
      "epoch": 0.22346368715083798,
      "grad_norm": 0.4166817107409061,
      "learning_rate": 9.612403100775195e-05,
      "loss": 0.65245237,
      "memory(GiB)": 71.22,
      "step": 20,
      "train_speed(iter/s)": 0.019764
    },
    {
      "acc": 0.82848577,
      "epoch": 0.33519553072625696,
      "grad_norm": 0.3476093112050191,
      "learning_rate": 9.224806201550387e-05,
      "loss": 0.62454967,
      "memory(GiB)": 55.52,
      "step": 30,
      "train_speed(iter/s)": 0.019918
    },
    {
      "acc": 0.83262348,
      "epoch": 0.44692737430167595,
      "grad_norm": 0.3040300336242288,
      "learning_rate": 8.837209302325582e-05,
      "loss": 0.60746822,
      "memory(GiB)": 55.52,
      "step": 40,
      "train_speed(iter/s)": 0.020099
    },
    {
      "acc": 0.8341733,
      "epoch": 0.5586592178770949,
      "grad_norm": 0.2927188576333117,
      "learning_rate": 8.449612403100775e-05,
      "loss": 0.59925413,
      "memory(GiB)": 71.5,
      "step": 50,
      "train_speed(iter/s)": 0.02013
    },
    {
      "acc": 0.83600845,
      "epoch": 0.6703910614525139,
      "grad_norm": 0.38995740991597366,
      "learning_rate": 8.062015503875969e-05,
      "loss": 0.59135432,
      "memory(GiB)": 55.54,
      "step": 60,
      "train_speed(iter/s)": 0.02016
    },
    {
      "acc": 0.83653011,
      "epoch": 0.7821229050279329,
      "grad_norm": 0.3224959350008302,
      "learning_rate": 7.674418604651163e-05,
      "loss": 0.58722138,
      "memory(GiB)": 55.54,
      "step": 70,
      "train_speed(iter/s)": 0.020203
    },
    {
      "acc": 0.83982677,
      "epoch": 0.8938547486033519,
      "grad_norm": 0.2717501594592104,
      "learning_rate": 7.286821705426357e-05,
      "loss": 0.57504473,
      "memory(GiB)": 55.54,
      "step": 80,
      "train_speed(iter/s)": 0.020198
    },
    {
      "acc": 0.84336185,
      "epoch": 1.005586592178771,
      "grad_norm": 0.38777497021142354,
      "learning_rate": 6.89922480620155e-05,
      "loss": 0.56000357,
      "memory(GiB)": 55.54,
      "step": 90,
      "train_speed(iter/s)": 0.020243
    },
    {
      "acc": 0.88669682,
      "epoch": 1.1173184357541899,
      "grad_norm": 0.2800447265610427,
      "learning_rate": 6.511627906976745e-05,
      "loss": 0.39480281,
      "memory(GiB)": 55.54,
      "step": 100,
      "train_speed(iter/s)": 0.020243
    },
    {
      "epoch": 1.1173184357541899,
      "eval_acc": 0.8374920610261495,
      "eval_loss": 0.6179258823394775,
      "eval_runtime": 14.3303,
      "eval_samples_per_second": 31.89,
      "eval_steps_per_second": 0.279,
      "step": 100
    },
    {
      "acc": 0.88833666,
      "epoch": 1.229050279329609,
      "grad_norm": 0.2666926991713443,
      "learning_rate": 6.124031007751938e-05,
      "loss": 0.38764906,
      "memory(GiB)": 56.39,
      "step": 110,
      "train_speed(iter/s)": 0.020132
    },
    {
      "acc": 0.88940392,
      "epoch": 1.3407821229050279,
      "grad_norm": 0.2666536892955014,
      "learning_rate": 5.736434108527132e-05,
      "loss": 0.38203318,
      "memory(GiB)": 72.39,
      "step": 120,
      "train_speed(iter/s)": 0.020153
    },
    {
      "acc": 0.88888655,
      "epoch": 1.452513966480447,
      "grad_norm": 0.2511228236819602,
      "learning_rate": 5.348837209302326e-05,
      "loss": 0.38319407,
      "memory(GiB)": 55.62,
      "step": 130,
      "train_speed(iter/s)": 0.020142
    },
    {
      "acc": 0.88929482,
      "epoch": 1.564245810055866,
      "grad_norm": 0.250375456577922,
      "learning_rate": 4.96124031007752e-05,
      "loss": 0.38362105,
      "memory(GiB)": 55.62,
      "step": 140,
      "train_speed(iter/s)": 0.020157
    },
    {
      "acc": 0.89008141,
      "epoch": 1.675977653631285,
      "grad_norm": 0.2664008940638054,
      "learning_rate": 4.573643410852713e-05,
      "loss": 0.37961533,
      "memory(GiB)": 63.63,
      "step": 150,
      "train_speed(iter/s)": 0.02017
    },
    {
      "acc": 0.88946552,
      "epoch": 1.7877094972067038,
      "grad_norm": 0.2568604002281673,
      "learning_rate": 4.186046511627907e-05,
      "loss": 0.38261704,
      "memory(GiB)": 63.63,
      "step": 160,
      "train_speed(iter/s)": 0.02016
    },
    {
      "acc": 0.89207363,
      "epoch": 1.899441340782123,
      "grad_norm": 0.25110691370775395,
      "learning_rate": 3.798449612403101e-05,
      "loss": 0.37335744,
      "memory(GiB)": 63.63,
      "step": 170,
      "train_speed(iter/s)": 0.020162
    },
    {
      "acc": 0.89511375,
      "epoch": 2.011173184357542,
      "grad_norm": 0.4578108117725898,
      "learning_rate": 3.4108527131782945e-05,
      "loss": 0.36452789,
      "memory(GiB)": 63.63,
      "step": 180,
      "train_speed(iter/s)": 0.02019
    },
    {
      "acc": 0.93461123,
      "epoch": 2.122905027932961,
      "grad_norm": 0.29379733174286393,
      "learning_rate": 3.0232558139534883e-05,
      "loss": 0.22719576,
      "memory(GiB)": 63.63,
      "step": 190,
      "train_speed(iter/s)": 0.020204
    },
    {
      "acc": 0.93616581,
      "epoch": 2.2346368715083798,
      "grad_norm": 0.2816020644085949,
      "learning_rate": 2.6356589147286826e-05,
      "loss": 0.22034373,
      "memory(GiB)": 63.63,
      "step": 200,
      "train_speed(iter/s)": 0.020189
    },
    {
      "epoch": 2.2346368715083798,
      "eval_acc": 0.8397126891074994,
      "eval_loss": 0.6819891929626465,
      "eval_runtime": 14.3111,
      "eval_samples_per_second": 31.933,
      "eval_steps_per_second": 0.28,
      "step": 200
    },
    {
      "acc": 0.93667021,
      "epoch": 2.346368715083799,
      "grad_norm": 0.27143644801177985,
      "learning_rate": 2.2480620155038764e-05,
      "loss": 0.21893153,
      "memory(GiB)": 63.63,
      "step": 210,
      "train_speed(iter/s)": 0.020144
    },
    {
      "acc": 0.9375206,
      "epoch": 2.458100558659218,
      "grad_norm": 0.24768232763167009,
      "learning_rate": 1.8604651162790697e-05,
      "loss": 0.21632226,
      "memory(GiB)": 63.63,
      "step": 220,
      "train_speed(iter/s)": 0.020144
    },
    {
      "acc": 0.93809719,
      "epoch": 2.5698324022346366,
      "grad_norm": 0.24790064179599028,
      "learning_rate": 1.4728682170542638e-05,
      "loss": 0.21427879,
      "memory(GiB)": 63.63,
      "step": 230,
      "train_speed(iter/s)": 0.02015
    },
    {
      "acc": 0.93803339,
      "epoch": 2.6815642458100557,
      "grad_norm": 0.24686113799484746,
      "learning_rate": 1.0852713178294575e-05,
      "loss": 0.21534572,
      "memory(GiB)": 63.63,
      "step": 240,
      "train_speed(iter/s)": 0.020161
    },
    {
      "acc": 0.93708591,
      "epoch": 2.793296089385475,
      "grad_norm": 0.2496609236672877,
      "learning_rate": 6.976744186046512e-06,
      "loss": 0.21791611,
      "memory(GiB)": 63.63,
      "step": 250,
      "train_speed(iter/s)": 0.020172
    },
    {
      "acc": 0.93700886,
      "epoch": 2.905027932960894,
      "grad_norm": 0.24396510705233476,
      "learning_rate": 3.10077519379845e-06,
      "loss": 0.21976945,
      "memory(GiB)": 63.63,
      "step": 260,
      "train_speed(iter/s)": 0.020173
    },
    {
      "epoch": 2.983240223463687,
      "eval_acc": 0.8414855362177129,
      "eval_loss": 0.6801542639732361,
      "eval_runtime": 14.3187,
      "eval_samples_per_second": 31.916,
      "eval_steps_per_second": 0.279,
      "step": 267
    }
  ],
  "logging_steps": 10,
  "max_steps": 267,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 100,
  "total_flos": 267385770803200.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}