|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.999549121276575, |
|
"global_step": 204000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 4.2154, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9950871056174036e-05, |
|
"loss": 3.5237, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.990174211234807e-05, |
|
"loss": 3.3401, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9852613168522107e-05, |
|
"loss": 3.2179, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.980348422469614e-05, |
|
"loss": 3.1655, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9754355280870174e-05, |
|
"loss": 3.1335, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9705226337044208e-05, |
|
"loss": 3.1039, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9656097393218242e-05, |
|
"loss": 3.0406, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9606968449392276e-05, |
|
"loss": 3.0212, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9557839505566313e-05, |
|
"loss": 3.0054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9508710561740344e-05, |
|
"loss": 2.9778, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.945958161791438e-05, |
|
"loss": 2.9428, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9410452674088415e-05, |
|
"loss": 2.9546, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.936132373026245e-05, |
|
"loss": 2.9442, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9312194786436483e-05, |
|
"loss": 2.9142, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9263065842610517e-05, |
|
"loss": 2.8991, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.921393689878455e-05, |
|
"loss": 2.8399, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9164807954958588e-05, |
|
"loss": 2.8758, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9115679011132618e-05, |
|
"loss": 2.8876, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9066550067306656e-05, |
|
"loss": 2.8404, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.901742112348069e-05, |
|
"loss": 2.8612, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8968292179654723e-05, |
|
"loss": 2.8226, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8919163235828757e-05, |
|
"loss": 2.8144, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.887003429200279e-05, |
|
"loss": 2.8132, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8820905348176825e-05, |
|
"loss": 2.8182, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.8771776404350862e-05, |
|
"loss": 2.7883, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.8722647460524893e-05, |
|
"loss": 2.7961, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.867351851669893e-05, |
|
"loss": 2.7605, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.8624389572872964e-05, |
|
"loss": 2.7576, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8575260629046998e-05, |
|
"loss": 2.7763, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.852613168522103e-05, |
|
"loss": 2.7519, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8477002741395065e-05, |
|
"loss": 2.7704, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8427873797569103e-05, |
|
"loss": 2.6919, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8378744853743137e-05, |
|
"loss": 2.7287, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.832961590991717e-05, |
|
"loss": 2.7121, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8280486966091204e-05, |
|
"loss": 2.7137, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8231358022265238e-05, |
|
"loss": 2.7361, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8182229078439272e-05, |
|
"loss": 2.7176, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.813310013461331e-05, |
|
"loss": 2.7055, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8083971190787343e-05, |
|
"loss": 2.7325, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8034842246961377e-05, |
|
"loss": 2.7259, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.798571330313541e-05, |
|
"loss": 2.6834, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.7936584359309445e-05, |
|
"loss": 2.6681, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.788745541548348e-05, |
|
"loss": 2.6709, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.7838326471657516e-05, |
|
"loss": 2.673, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7789197527831547e-05, |
|
"loss": 2.6801, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7740068584005584e-05, |
|
"loss": 2.6635, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7690939640179618e-05, |
|
"loss": 2.6699, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.764181069635365e-05, |
|
"loss": 2.6431, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7592681752527685e-05, |
|
"loss": 2.6895, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.754355280870172e-05, |
|
"loss": 2.6452, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7494423864875753e-05, |
|
"loss": 2.6619, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.744529492104979e-05, |
|
"loss": 2.6232, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.739616597722382e-05, |
|
"loss": 2.663, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7347037033397858e-05, |
|
"loss": 2.6151, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7297908089571892e-05, |
|
"loss": 2.6166, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7248779145745926e-05, |
|
"loss": 2.6394, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.719965020191996e-05, |
|
"loss": 2.6242, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7150521258093994e-05, |
|
"loss": 2.6205, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.7101392314268028e-05, |
|
"loss": 2.6596, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7052263370442065e-05, |
|
"loss": 2.6469, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7003134426616095e-05, |
|
"loss": 2.6189, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6954005482790133e-05, |
|
"loss": 2.6035, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6904876538964167e-05, |
|
"loss": 2.5861, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.68557475951382e-05, |
|
"loss": 2.6504, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.6806618651312238e-05, |
|
"loss": 2.6006, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6757489707486268e-05, |
|
"loss": 2.6398, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6708360763660305e-05, |
|
"loss": 2.6024, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.665923181983434e-05, |
|
"loss": 2.6039, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.6610102876008373e-05, |
|
"loss": 2.6224, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.6560973932182407e-05, |
|
"loss": 2.5644, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.651184498835644e-05, |
|
"loss": 2.5826, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6462716044530475e-05, |
|
"loss": 2.585, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.6413587100704512e-05, |
|
"loss": 2.5589, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.6364458156878546e-05, |
|
"loss": 2.6029, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.631532921305258e-05, |
|
"loss": 2.554, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.6266200269226614e-05, |
|
"loss": 2.5599, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.6217071325400648e-05, |
|
"loss": 2.5285, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.616794238157468e-05, |
|
"loss": 2.5684, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.611881343774872e-05, |
|
"loss": 2.5816, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.606968449392275e-05, |
|
"loss": 2.566, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.6020555550096787e-05, |
|
"loss": 2.5244, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.597142660627082e-05, |
|
"loss": 2.5571, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.5922297662444854e-05, |
|
"loss": 2.5071, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.5873168718618888e-05, |
|
"loss": 2.5497, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.5824039774792922e-05, |
|
"loss": 2.5566, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.5774910830966956e-05, |
|
"loss": 2.5448, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.5725781887140993e-05, |
|
"loss": 2.5649, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.5676652943315024e-05, |
|
"loss": 2.5562, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.562752399948906e-05, |
|
"loss": 2.5756, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.5578395055663095e-05, |
|
"loss": 2.5357, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.552926611183713e-05, |
|
"loss": 2.5394, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.5480137168011163e-05, |
|
"loss": 2.5416, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.5431008224185196e-05, |
|
"loss": 2.523, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5381879280359234e-05, |
|
"loss": 2.4957, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5332750336533268e-05, |
|
"loss": 2.5392, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.52836213927073e-05, |
|
"loss": 2.5505, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5234492448881335e-05, |
|
"loss": 2.5251, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5185363505055371e-05, |
|
"loss": 2.4836, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5136234561229403e-05, |
|
"loss": 2.5077, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.5087105617403439e-05, |
|
"loss": 2.5103, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.5037976673577473e-05, |
|
"loss": 2.5157, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.4988847729751508e-05, |
|
"loss": 2.509, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.493971878592554e-05, |
|
"loss": 2.4982, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.4890589842099576e-05, |
|
"loss": 2.4741, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.484146089827361e-05, |
|
"loss": 2.5438, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.4792331954447645e-05, |
|
"loss": 2.4698, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4743203010621678e-05, |
|
"loss": 2.5175, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.4694074066795713e-05, |
|
"loss": 2.4857, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4644945122969747e-05, |
|
"loss": 2.5006, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.4595816179143783e-05, |
|
"loss": 2.4875, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.4546687235317816e-05, |
|
"loss": 2.4964, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.449755829149185e-05, |
|
"loss": 2.5175, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.4448429347665884e-05, |
|
"loss": 2.4912, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.439930040383992e-05, |
|
"loss": 2.5074, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.4350171460013954e-05, |
|
"loss": 2.4655, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.430104251618799e-05, |
|
"loss": 2.4985, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4251913572362021e-05, |
|
"loss": 2.4791, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4202784628536057e-05, |
|
"loss": 2.4881, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4153655684710091e-05, |
|
"loss": 2.4805, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4104526740884126e-05, |
|
"loss": 2.4591, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4055397797058159e-05, |
|
"loss": 2.4958, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4006268853232194e-05, |
|
"loss": 2.4691, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3957139909406228e-05, |
|
"loss": 2.5049, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3908010965580264e-05, |
|
"loss": 2.4584, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.38588820217543e-05, |
|
"loss": 2.4761, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3809753077928331e-05, |
|
"loss": 2.4677, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3760624134102367e-05, |
|
"loss": 2.4868, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.3711495190276401e-05, |
|
"loss": 2.4739, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3662366246450436e-05, |
|
"loss": 2.4558, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.3613237302624469e-05, |
|
"loss": 2.4528, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.3564108358798504e-05, |
|
"loss": 2.4608, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.3514979414972538e-05, |
|
"loss": 2.4461, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3465850471146574e-05, |
|
"loss": 2.4382, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.3416721527320606e-05, |
|
"loss": 2.4554, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3367592583494641e-05, |
|
"loss": 2.4488, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.3318463639668675e-05, |
|
"loss": 2.4594, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3269334695842711e-05, |
|
"loss": 2.4583, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3220205752016743e-05, |
|
"loss": 2.4392, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3171076808190779e-05, |
|
"loss": 2.4366, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3121947864364813e-05, |
|
"loss": 2.4437, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.3072818920538848e-05, |
|
"loss": 2.4635, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.302368997671288e-05, |
|
"loss": 2.435, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.2974561032886916e-05, |
|
"loss": 2.4333, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.292543208906095e-05, |
|
"loss": 2.4678, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.2876303145234985e-05, |
|
"loss": 2.4538, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.282717420140902e-05, |
|
"loss": 2.4561, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2778045257583053e-05, |
|
"loss": 2.5159, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.2728916313757087e-05, |
|
"loss": 2.4392, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2679787369931123e-05, |
|
"loss": 2.4503, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2630658426105156e-05, |
|
"loss": 2.4428, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2581529482279192e-05, |
|
"loss": 2.426, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2532400538453224e-05, |
|
"loss": 2.4809, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.248327159462726e-05, |
|
"loss": 2.4687, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2434142650801294e-05, |
|
"loss": 2.4306, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.238501370697533e-05, |
|
"loss": 2.4455, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2335884763149363e-05, |
|
"loss": 2.3706, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2286755819323397e-05, |
|
"loss": 2.4224, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2237626875497433e-05, |
|
"loss": 2.4242, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2188497931671466e-05, |
|
"loss": 2.429, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2139368987845502e-05, |
|
"loss": 2.4247, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.2090240044019534e-05, |
|
"loss": 2.4195, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.204111110019357e-05, |
|
"loss": 2.4293, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1991982156367604e-05, |
|
"loss": 2.4245, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.194285321254164e-05, |
|
"loss": 2.4415, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.1893724268715671e-05, |
|
"loss": 2.4199, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.1844595324889707e-05, |
|
"loss": 2.4386, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.179546638106374e-05, |
|
"loss": 2.4063, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.1746337437237776e-05, |
|
"loss": 2.4376, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.1697208493411809e-05, |
|
"loss": 2.4177, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1648079549585844e-05, |
|
"loss": 2.3778, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.1598950605759878e-05, |
|
"loss": 2.4355, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1549821661933914e-05, |
|
"loss": 2.4214, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1500692718107946e-05, |
|
"loss": 2.3941, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1451563774281981e-05, |
|
"loss": 2.4122, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1402434830456015e-05, |
|
"loss": 2.4021, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.135330588663005e-05, |
|
"loss": 2.4154, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.1304176942804083e-05, |
|
"loss": 2.4004, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1255047998978119e-05, |
|
"loss": 2.4091, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.1205919055152152e-05, |
|
"loss": 2.4348, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1156790111326188e-05, |
|
"loss": 2.3965, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1107661167500222e-05, |
|
"loss": 2.3904, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1058532223674256e-05, |
|
"loss": 2.3947, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.100940327984829e-05, |
|
"loss": 2.4075, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0960274336022325e-05, |
|
"loss": 2.3987, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.0911145392196359e-05, |
|
"loss": 2.4116, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0862016448370393e-05, |
|
"loss": 2.4416, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0812887504544429e-05, |
|
"loss": 2.3899, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.0763758560718462e-05, |
|
"loss": 2.4015, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.0714629616892498e-05, |
|
"loss": 2.3741, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0665500673066532e-05, |
|
"loss": 2.3951, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.0616371729240566e-05, |
|
"loss": 2.406, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.05672427854146e-05, |
|
"loss": 2.4102, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0518113841588635e-05, |
|
"loss": 2.4031, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0468984897762669e-05, |
|
"loss": 2.417, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0419855953936705e-05, |
|
"loss": 2.3978, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0370727010110737e-05, |
|
"loss": 2.4009, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0321598066284772e-05, |
|
"loss": 2.3966, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0272469122458806e-05, |
|
"loss": 2.3918, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0223340178632842e-05, |
|
"loss": 2.3853, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0174211234806874e-05, |
|
"loss": 2.3996, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.012508229098091e-05, |
|
"loss": 2.388, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0075953347154944e-05, |
|
"loss": 2.3845, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0026824403328979e-05, |
|
"loss": 2.385, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.977695459503013e-06, |
|
"loss": 2.3387, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.928566515677047e-06, |
|
"loss": 2.3401, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.87943757185108e-06, |
|
"loss": 2.3291, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.830308628025116e-06, |
|
"loss": 2.3783, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.78117968419915e-06, |
|
"loss": 2.3154, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.732050740373184e-06, |
|
"loss": 2.3229, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.68292179654722e-06, |
|
"loss": 2.3844, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.633792852721254e-06, |
|
"loss": 2.3945, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.584663908895287e-06, |
|
"loss": 2.3354, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.535534965069321e-06, |
|
"loss": 2.325, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.486406021243357e-06, |
|
"loss": 2.3722, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.43727707741739e-06, |
|
"loss": 2.37, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.388148133591425e-06, |
|
"loss": 2.3454, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.339019189765458e-06, |
|
"loss": 2.3118, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.289890245939494e-06, |
|
"loss": 2.34, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.240761302113528e-06, |
|
"loss": 2.3156, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.191632358287562e-06, |
|
"loss": 2.3686, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.142503414461596e-06, |
|
"loss": 2.3641, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.093374470635631e-06, |
|
"loss": 2.3399, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.044245526809665e-06, |
|
"loss": 2.3038, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.995116582983699e-06, |
|
"loss": 2.3394, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.945987639157735e-06, |
|
"loss": 2.3144, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.896858695331768e-06, |
|
"loss": 2.3542, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.847729751505802e-06, |
|
"loss": 2.3378, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.798600807679836e-06, |
|
"loss": 2.3562, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.749471863853872e-06, |
|
"loss": 2.3759, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.700342920027906e-06, |
|
"loss": 2.3459, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.65121397620194e-06, |
|
"loss": 2.3041, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.602085032375973e-06, |
|
"loss": 2.3076, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.552956088550009e-06, |
|
"loss": 2.3598, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.503827144724045e-06, |
|
"loss": 2.3463, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.454698200898078e-06, |
|
"loss": 2.3505, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.405569257072112e-06, |
|
"loss": 2.3106, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.356440313246146e-06, |
|
"loss": 2.3391, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.307311369420182e-06, |
|
"loss": 2.3316, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.258182425594216e-06, |
|
"loss": 2.3122, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.20905348176825e-06, |
|
"loss": 2.3244, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.159924537942283e-06, |
|
"loss": 2.3457, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.110795594116319e-06, |
|
"loss": 2.328, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.061666650290353e-06, |
|
"loss": 2.319, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.012537706464387e-06, |
|
"loss": 2.3168, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.963408762638422e-06, |
|
"loss": 2.3288, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.914279818812456e-06, |
|
"loss": 2.3169, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.86515087498649e-06, |
|
"loss": 2.3398, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.816021931160524e-06, |
|
"loss": 2.3182, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.76689298733456e-06, |
|
"loss": 2.3219, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.717764043508593e-06, |
|
"loss": 2.3407, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.668635099682627e-06, |
|
"loss": 2.2986, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.619506155856662e-06, |
|
"loss": 2.3192, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.570377212030696e-06, |
|
"loss": 2.3229, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.521248268204731e-06, |
|
"loss": 2.3073, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.4721193243787646e-06, |
|
"loss": 2.2936, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.422990380552799e-06, |
|
"loss": 2.3078, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.373861436726833e-06, |
|
"loss": 2.3507, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.324732492900868e-06, |
|
"loss": 2.3071, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.275603549074902e-06, |
|
"loss": 2.3076, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.2264746052489365e-06, |
|
"loss": 2.3321, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.177345661422971e-06, |
|
"loss": 2.3224, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.128216717597005e-06, |
|
"loss": 2.3083, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.07908777377104e-06, |
|
"loss": 2.3196, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.0299588299450745e-06, |
|
"loss": 2.3311, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.980829886119109e-06, |
|
"loss": 2.3505, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.931700942293144e-06, |
|
"loss": 2.3014, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.882571998467178e-06, |
|
"loss": 2.3001, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.833443054641213e-06, |
|
"loss": 2.3122, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.7843141108152465e-06, |
|
"loss": 2.3031, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.735185166989281e-06, |
|
"loss": 2.3426, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.686056223163315e-06, |
|
"loss": 2.3242, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.63692727933735e-06, |
|
"loss": 2.3312, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.587798335511384e-06, |
|
"loss": 2.3265, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.5386693916854184e-06, |
|
"loss": 2.3226, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.489540447859452e-06, |
|
"loss": 2.2682, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.440411504033487e-06, |
|
"loss": 2.2961, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 6.391282560207521e-06, |
|
"loss": 2.2935, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.342153616381556e-06, |
|
"loss": 2.324, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.2930246725555895e-06, |
|
"loss": 2.2955, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.243895728729624e-06, |
|
"loss": 2.292, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.194766784903659e-06, |
|
"loss": 2.3076, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.145637841077693e-06, |
|
"loss": 2.3151, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.0965088972517276e-06, |
|
"loss": 2.3051, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.0473799534257615e-06, |
|
"loss": 2.3247, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.998251009599796e-06, |
|
"loss": 2.2741, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.94912206577383e-06, |
|
"loss": 2.2937, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.899993121947865e-06, |
|
"loss": 2.2883, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.850864178121899e-06, |
|
"loss": 2.2985, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.801735234295933e-06, |
|
"loss": 2.3006, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.752606290469967e-06, |
|
"loss": 2.2663, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.703477346644002e-06, |
|
"loss": 2.3, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.654348402818036e-06, |
|
"loss": 2.2688, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.605219458992071e-06, |
|
"loss": 2.2915, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.5560905151661045e-06, |
|
"loss": 2.2897, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.50696157134014e-06, |
|
"loss": 2.3316, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.457832627514175e-06, |
|
"loss": 2.3386, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.408703683688209e-06, |
|
"loss": 2.3215, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.359574739862243e-06, |
|
"loss": 2.3012, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.310445796036277e-06, |
|
"loss": 2.2939, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.261316852210312e-06, |
|
"loss": 2.2933, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.212187908384346e-06, |
|
"loss": 2.3057, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.163058964558381e-06, |
|
"loss": 2.2883, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.113930020732415e-06, |
|
"loss": 2.3136, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.064801076906449e-06, |
|
"loss": 2.2841, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.015672133080484e-06, |
|
"loss": 2.2684, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.966543189254518e-06, |
|
"loss": 2.2535, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.9174142454285525e-06, |
|
"loss": 2.2899, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.868285301602586e-06, |
|
"loss": 2.308, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.819156357776621e-06, |
|
"loss": 2.297, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.770027413950655e-06, |
|
"loss": 2.3058, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.72089847012469e-06, |
|
"loss": 2.2709, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.671769526298724e-06, |
|
"loss": 2.2958, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.622640582472758e-06, |
|
"loss": 2.3003, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.573511638646792e-06, |
|
"loss": 2.2724, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.524382694820827e-06, |
|
"loss": 2.2806, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.475253750994861e-06, |
|
"loss": 2.2622, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.4261248071688956e-06, |
|
"loss": 2.2915, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.37699586334293e-06, |
|
"loss": 2.3153, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.327866919516965e-06, |
|
"loss": 2.2784, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.278737975690999e-06, |
|
"loss": 2.2704, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.229609031865034e-06, |
|
"loss": 2.2792, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.1804800880390675e-06, |
|
"loss": 2.2642, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.131351144213102e-06, |
|
"loss": 2.2858, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.082222200387136e-06, |
|
"loss": 2.2784, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.033093256561171e-06, |
|
"loss": 2.2851, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.983964312735205e-06, |
|
"loss": 2.2762, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.9348353689092394e-06, |
|
"loss": 2.3028, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.885706425083274e-06, |
|
"loss": 2.2735, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.836577481257308e-06, |
|
"loss": 2.2511, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.7874485374313424e-06, |
|
"loss": 2.2856, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.7383195936053767e-06, |
|
"loss": 2.2979, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.689190649779411e-06, |
|
"loss": 2.2537, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6400617059534453e-06, |
|
"loss": 2.2746, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.5909327621274804e-06, |
|
"loss": 2.3058, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.5418038183015147e-06, |
|
"loss": 2.3015, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.492674874475549e-06, |
|
"loss": 2.2863, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.4435459306495833e-06, |
|
"loss": 2.2733, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.3944169868236176e-06, |
|
"loss": 2.2806, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.345288042997652e-06, |
|
"loss": 2.259, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.2961590991716862e-06, |
|
"loss": 2.3225, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.2470301553457205e-06, |
|
"loss": 2.2887, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.197901211519755e-06, |
|
"loss": 2.2957, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.148772267693789e-06, |
|
"loss": 2.294, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.0996433238678234e-06, |
|
"loss": 2.2976, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.050514380041858e-06, |
|
"loss": 2.3028, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.0013854362158925e-06, |
|
"loss": 2.2875, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.9522564923899268e-06, |
|
"loss": 2.3208, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.903127548563961e-06, |
|
"loss": 2.3068, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.853998604737996e-06, |
|
"loss": 2.28, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.80486966091203e-06, |
|
"loss": 2.301, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7557407170860644e-06, |
|
"loss": 2.2428, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.7066117732600987e-06, |
|
"loss": 2.2843, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.657482829434133e-06, |
|
"loss": 2.2756, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.6083538856081673e-06, |
|
"loss": 2.289, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.5592249417822016e-06, |
|
"loss": 2.2902, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.5100959979562363e-06, |
|
"loss": 2.298, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.4609670541302707e-06, |
|
"loss": 2.2764, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.411838110304305e-06, |
|
"loss": 2.2487, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3627091664783393e-06, |
|
"loss": 2.323, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.3135802226523736e-06, |
|
"loss": 2.3174, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2644512788264083e-06, |
|
"loss": 2.3205, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2153223350004426e-06, |
|
"loss": 2.313, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.166193391174477e-06, |
|
"loss": 2.3015, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.117064447348511e-06, |
|
"loss": 2.2836, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0679355035225455e-06, |
|
"loss": 2.2737, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.01880655969658e-06, |
|
"loss": 2.2824, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.969677615870614e-06, |
|
"loss": 2.2577, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.920548672044649e-06, |
|
"loss": 2.2563, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.871419728218683e-06, |
|
"loss": 2.3253, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.8222907843927174e-06, |
|
"loss": 2.2504, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7731618405667517e-06, |
|
"loss": 2.3124, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.724032896740786e-06, |
|
"loss": 2.2784, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6749039529148203e-06, |
|
"loss": 2.2832, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6257750090888546e-06, |
|
"loss": 2.2924, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.576646065262889e-06, |
|
"loss": 2.2843, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.5275171214369237e-06, |
|
"loss": 2.2724, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.478388177610958e-06, |
|
"loss": 2.3194, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.4292592337849923e-06, |
|
"loss": 2.2532, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.3801302899590266e-06, |
|
"loss": 2.2773, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.3310013461330609e-06, |
|
"loss": 2.2869, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.2818724023070952e-06, |
|
"loss": 2.2751, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.2327434584811297e-06, |
|
"loss": 2.3004, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.183614514655164e-06, |
|
"loss": 2.3015, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.1344855708291983e-06, |
|
"loss": 2.2566, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.0853566270032326e-06, |
|
"loss": 2.2978, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.0362276831772671e-06, |
|
"loss": 2.2898, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.870987393513014e-07, |
|
"loss": 2.2806, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.379697955253358e-07, |
|
"loss": 2.2777, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.888408516993701e-07, |
|
"loss": 2.261, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.397119078734047e-07, |
|
"loss": 2.252, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.90582964047439e-07, |
|
"loss": 2.2906, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.414540202214733e-07, |
|
"loss": 2.2968, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.923250763955077e-07, |
|
"loss": 2.2463, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 6.431961325695421e-07, |
|
"loss": 2.2681, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.940671887435765e-07, |
|
"loss": 2.2965, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.449382449176108e-07, |
|
"loss": 2.2705, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.958093010916452e-07, |
|
"loss": 2.2669, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.4668035726567956e-07, |
|
"loss": 2.2478, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.975514134397139e-07, |
|
"loss": 2.3053, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.484224696137483e-07, |
|
"loss": 2.2731, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.992935257877826e-07, |
|
"loss": 2.2518, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.50164581961817e-07, |
|
"loss": 2.2657, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.0103563813585136e-07, |
|
"loss": 2.2967, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.5190669430988575e-07, |
|
"loss": 2.2874, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.0277775048392012e-07, |
|
"loss": 2.2764, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.364880665795447e-08, |
|
"loss": 2.2628, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.519862831988839e-09, |
|
"loss": 2.2711, |
|
"step": 204000 |
|
} |
|
], |
|
"max_steps": 204046, |
|
"num_train_epochs": 2, |
|
"total_flos": 6.966781556932454e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|