|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4814814814814817e-06, |
|
"loss": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.185185185185185e-05, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.925925925925926e-05, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.148148148148148e-05, |
|
"loss": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.62962962962963e-05, |
|
"loss": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001037037037037037, |
|
"loss": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011851851851851852, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012592592592592592, |
|
"loss": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014074074074074076, |
|
"loss": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016296296296296295, |
|
"loss": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017037037037037037, |
|
"loss": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001851851851851852, |
|
"loss": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001925925925925926, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019999164298554375, |
|
"loss": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019996657333896877, |
|
"loss": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019992479525042303, |
|
"loss": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019986631570270832, |
|
"loss": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019979114447011323, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001996992941167792, |
|
"loss": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019959077999460095, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019946562024066014, |
|
"loss": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019932383577419432, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019916545029310012, |
|
"loss": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019899049026997272, |
|
"loss": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019879898494768093, |
|
"loss": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019859096633447965, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001983664691986601, |
|
"loss": 0.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019812553106273847, |
|
"loss": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019786819219718443, |
|
"loss": 0.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019759449561369038, |
|
"loss": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019730448705798239, |
|
"loss": 0.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019699821500217434, |
|
"loss": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001966757306366662, |
|
"loss": 0.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019633708786158806, |
|
"loss": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019598234327779118, |
|
"loss": 0.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019561155617738797, |
|
"loss": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019522478853384155, |
|
"loss": 0.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019482210499160765, |
|
"loss": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019440357285533, |
|
"loss": 0.0, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019396926207859084, |
|
"loss": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.000193519245252219, |
|
"loss": 0.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019305359759215685, |
|
"loss": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019257239692688907, |
|
"loss": 0.0, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019207572368443385, |
|
"loss": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001915636608789006, |
|
"loss": 0.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001910362940966147, |
|
"loss": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019049371148181253, |
|
"loss": 0.0, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018993600372190932, |
|
"loss": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018936326403234125, |
|
"loss": 0.0, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018877558814098561, |
|
"loss": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001881730742721608, |
|
"loss": 0.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001875558231302091, |
|
"loss": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018692393788266479, |
|
"loss": 0.0, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018627752414301086, |
|
"loss": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018561668995302667, |
|
"loss": 0.0, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018494154576472976, |
|
"loss": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018425220442191495, |
|
"loss": 0.0, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018354878114129367, |
|
"loss": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018283139349323634, |
|
"loss": 0.0, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018210016138212187, |
|
"loss": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018135520702629675, |
|
"loss": 0.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018059665493764743, |
|
"loss": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001798246319007893, |
|
"loss": 0.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017903926695187595, |
|
"loss": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017824069135703198, |
|
"loss": 0.0, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017742903859041325, |
|
"loss": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001766044443118978, |
|
"loss": 0.0, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001757670463444118, |
|
"loss": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017491698465089362, |
|
"loss": 0.0, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017405440131090048, |
|
"loss": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017317944049686124, |
|
"loss": 0.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017229224844997928, |
|
"loss": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017139297345578994, |
|
"loss": 0.0, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017048176581937563, |
|
"loss": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001695587778402442, |
|
"loss": 0.0, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001686241637868734, |
|
"loss": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016767807987092621, |
|
"loss": 0.0, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016672068422114196, |
|
"loss": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001657521368569064, |
|
"loss": 0.0, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016477259966150588, |
|
"loss": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001637822363550706, |
|
"loss": 0.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016278121246720987, |
|
"loss": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016176969530934572, |
|
"loss": 0.0, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00016074785394674837, |
|
"loss": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015971585917027862, |
|
"loss": 0.0, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001586738834678418, |
|
"loss": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015762210099555803, |
|
"loss": 0.0, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015656068754865387, |
|
"loss": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001554898205320797, |
|
"loss": 0.0, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015440967893085828, |
|
"loss": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015332044328016914, |
|
"loss": 0.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015222229563517385, |
|
"loss": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015111541954058734, |
|
"loss": 0.0, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014887622344495643, |
|
"loss": 0.0, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001477442777037949, |
|
"loss": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001466043519702539, |
|
"loss": 0.0, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014545663677185006, |
|
"loss": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014430132393803352, |
|
"loss": 0.0, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014313860656812536, |
|
"loss": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001419686789990429, |
|
"loss": 0.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014079173677281837, |
|
"loss": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001396079766039157, |
|
"loss": 0.0, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013841759634635178, |
|
"loss": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013722079496062702, |
|
"loss": 0.0, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013601777248047105, |
|
"loss": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013480872997940905, |
|
"loss": 0.0, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013359386953715421, |
|
"loss": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013237339420583212, |
|
"loss": 0.0, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013114750797604247, |
|
"loss": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012991641574276418, |
|
"loss": 0.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012868032327110904, |
|
"loss": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012743943716193016, |
|
"loss": 0.0, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001261939648172906, |
|
"loss": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012494411440579814, |
|
"loss": 0.0, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012369009482781192, |
|
"loss": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00012243211568052677, |
|
"loss": 0.0, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001211703872229411, |
|
"loss": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011990512034071406, |
|
"loss": 0.0, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011863652651091823, |
|
"loss": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011736481776669306, |
|
"loss": 0.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011609020666180575, |
|
"loss": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001148129062351249, |
|
"loss": 0.0, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011353312997501313, |
|
"loss": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011225109178364455, |
|
"loss": 0.0, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00011096700594125318, |
|
"loss": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010968108707031792, |
|
"loss": 0.0, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010839355009969068, |
|
"loss": 0.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010710461022867302, |
|
"loss": 0.0, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010581448289104758, |
|
"loss": 0.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010452338371907064, |
|
"loss": 0.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010323152850743107, |
|
"loss": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010193913317718244, |
|
"loss": 0.0, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010064641373965393, |
|
"loss": 0.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.935358626034606e-05, |
|
"loss": 0.0, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.806086682281758e-05, |
|
"loss": 0.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.676847149256895e-05, |
|
"loss": 0.0, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.547661628092937e-05, |
|
"loss": 0.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.418551710895243e-05, |
|
"loss": 0.0, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.289538977132703e-05, |
|
"loss": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.160644990030931e-05, |
|
"loss": 0.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.03189129296821e-05, |
|
"loss": 0.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.903299405874684e-05, |
|
"loss": 0.0, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.774890821635548e-05, |
|
"loss": 0.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.646687002498692e-05, |
|
"loss": 0.0, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.518709376487515e-05, |
|
"loss": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.390979333819426e-05, |
|
"loss": 0.0, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.263518223330697e-05, |
|
"loss": 0.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.13634734890818e-05, |
|
"loss": 0.0, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.009487965928596e-05, |
|
"loss": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.882961277705895e-05, |
|
"loss": 0.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.756788431947326e-05, |
|
"loss": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.630990517218808e-05, |
|
"loss": 0.0, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.505588559420189e-05, |
|
"loss": 0.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.380603518270941e-05, |
|
"loss": 0.0, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.256056283806986e-05, |
|
"loss": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.131967672889101e-05, |
|
"loss": 0.0, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.008358425723585e-05, |
|
"loss": 0.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.885249202395754e-05, |
|
"loss": 0.0, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.762660579416791e-05, |
|
"loss": 0.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.640613046284581e-05, |
|
"loss": 0.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.519127002059095e-05, |
|
"loss": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.398222751952899e-05, |
|
"loss": 0.0, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.277920503937303e-05, |
|
"loss": 0.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.158240365364823e-05, |
|
"loss": 0.0, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.039202339608432e-05, |
|
"loss": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.920826322718165e-05, |
|
"loss": 0.0, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.80313210009571e-05, |
|
"loss": 0.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.6861393431874675e-05, |
|
"loss": 0.0, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.5698676061966515e-05, |
|
"loss": 0.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.4543363228149946e-05, |
|
"loss": 0.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.339564802974615e-05, |
|
"loss": 0.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.22557222962051e-05, |
|
"loss": 0.0, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.112377655504359e-05, |
|
"loss": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 0.0, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.888458045941269e-05, |
|
"loss": 0.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.777770436482617e-05, |
|
"loss": 0.0, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.66795567198309e-05, |
|
"loss": 0.0, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.559032106914173e-05, |
|
"loss": 0.0, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.451017946792032e-05, |
|
"loss": 0.0, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.343931245134616e-05, |
|
"loss": 0.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.2377899004441966e-05, |
|
"loss": 0.0, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.132611653215822e-05, |
|
"loss": 0.0, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.028414082972141e-05, |
|
"loss": 0.0, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.9252146053251636e-05, |
|
"loss": 0.0, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.8230304690654304e-05, |
|
"loss": 0.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.721878753279017e-05, |
|
"loss": 0.0, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.621776364492939e-05, |
|
"loss": 0.0, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.522740033849411e-05, |
|
"loss": 0.0, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.424786314309365e-05, |
|
"loss": 0.0, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3279315778858036e-05, |
|
"loss": 0.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.2321920129073816e-05, |
|
"loss": 0.0, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.137583621312665e-05, |
|
"loss": 0.0, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.04412221597558e-05, |
|
"loss": 0.0, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9518234180624393e-05, |
|
"loss": 0.0, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8607026544210114e-05, |
|
"loss": 0.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.770775155002071e-05, |
|
"loss": 0.0, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6820559503138797e-05, |
|
"loss": 0.0, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.594559868909956e-05, |
|
"loss": 0.0, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.50830153491064e-05, |
|
"loss": 0.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.423295365558821e-05, |
|
"loss": 0.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.339555568810221e-05, |
|
"loss": 0.0, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2570961409586754e-05, |
|
"loss": 0.0, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1759308642968025e-05, |
|
"loss": 0.0, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0960733048124083e-05, |
|
"loss": 0.0, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.01753680992107e-05, |
|
"loss": 0.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9403345062352573e-05, |
|
"loss": 0.0, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.864479297370325e-05, |
|
"loss": 0.0, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7899838617878163e-05, |
|
"loss": 0.0, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7168606506763695e-05, |
|
"loss": 0.0, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6451218858706374e-05, |
|
"loss": 0.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5747795578085046e-05, |
|
"loss": 0.0, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.505845423527027e-05, |
|
"loss": 0.0, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4383310046973365e-05, |
|
"loss": 0.0, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3722475856989158e-05, |
|
"loss": 0.0, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.307606211733522e-05, |
|
"loss": 0.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2444176869790925e-05, |
|
"loss": 0.0, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.18269257278392e-05, |
|
"loss": 0.0, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1224411859014417e-05, |
|
"loss": 0.0, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0636735967658784e-05, |
|
"loss": 0.0, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0063996278090704e-05, |
|
"loss": 0.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.506288518187467e-06, |
|
"loss": 0.0, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.963705903385345e-06, |
|
"loss": 0.0, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.436339121099412e-06, |
|
"loss": 0.0, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.92427631556617e-06, |
|
"loss": 0.0, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.427603073110967e-06, |
|
"loss": 0.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.946402407843155e-06, |
|
"loss": 0.0, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.480754747781037e-06, |
|
"loss": 0.0, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.030737921409169e-06, |
|
"loss": 0.0, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.596427144670002e-06, |
|
"loss": 0.0, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.177895008392353e-06, |
|
"loss": 0.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.775211466158469e-06, |
|
"loss": 0.0, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.3884438226120424e-06, |
|
"loss": 0.0, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.017656722208807e-06, |
|
"loss": 0.0, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.662912138411967e-06, |
|
"loss": 0.0, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3242693633337983e-06, |
|
"loss": 0.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.0017849978256516e-06, |
|
"loss": 0.0, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6955129420176196e-06, |
|
"loss": 0.0, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4055043863096428e-06, |
|
"loss": 0.0, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1318078028155888e-06, |
|
"loss": 0.0, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.874468937261531e-06, |
|
"loss": 0.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6335308013398886e-06, |
|
"loss": 0.0, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.409033665520354e-06, |
|
"loss": 0.0, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.201015052319099e-06, |
|
"loss": 0.0, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0095097300273026e-06, |
|
"loss": 0.0, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.345497068998897e-07, |
|
"loss": 0.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.761642258056978e-07, |
|
"loss": 0.0, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.343797593398536e-07, |
|
"loss": 0.0, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.092200053990691e-07, |
|
"loss": 0.0, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.007058832207976e-07, |
|
"loss": 0.0, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.088555298867978e-07, |
|
"loss": 0.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3368429729168076e-07, |
|
"loss": 0.0, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.520474957699586e-08, |
|
"loss": 0.0, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3426661031255026e-08, |
|
"loss": 0.0, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.357014456272794e-09, |
|
"loss": 0.0, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": NaN, |
|
"eval_runtime": 865.6078, |
|
"eval_samples_per_second": 2.669, |
|
"eval_steps_per_second": 0.334, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1350, |
|
"total_flos": 2763707490304000.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 23143.2556, |
|
"train_samples_per_second": 0.933, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 2763707490304000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|