|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.613999632555576, |
|
"global_step": 72000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.999999632555577e-05, |
|
"loss": 1.5591, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.999999265111153e-05, |
|
"loss": 1.3368, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999998897666729e-05, |
|
"loss": 1.2706, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.999998530222305e-05, |
|
"loss": 1.2055, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.999998162777881e-05, |
|
"loss": 1.1738, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.999997795333457e-05, |
|
"loss": 1.1357, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.999997427889031e-05, |
|
"loss": 1.1226, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.999997060444607e-05, |
|
"loss": 1.0901, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.999996693000185e-05, |
|
"loss": 1.0793, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.999996325555761e-05, |
|
"loss": 1.0587, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.999995958111336e-05, |
|
"loss": 1.0522, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.999995590666912e-05, |
|
"loss": 1.0277, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.999995223222488e-05, |
|
"loss": 1.0177, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.999994855778064e-05, |
|
"loss": 1.0164, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.999994488333641e-05, |
|
"loss": 0.9814, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.999994120889216e-05, |
|
"loss": 0.9802, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.999993753444792e-05, |
|
"loss": 0.9808, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.999993386000368e-05, |
|
"loss": 0.9686, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.999993018555944e-05, |
|
"loss": 0.9531, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.99999265111152e-05, |
|
"loss": 0.9332, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.999992283667096e-05, |
|
"loss": 0.9486, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.999991916222672e-05, |
|
"loss": 0.9145, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.999991548778248e-05, |
|
"loss": 0.9164, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.999991181333824e-05, |
|
"loss": 0.8984, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.999990813889399e-05, |
|
"loss": 0.9016, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.999990446444975e-05, |
|
"loss": 0.9019, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.999990079000552e-05, |
|
"loss": 0.8835, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.999989711556128e-05, |
|
"loss": 0.8829, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.999989344111703e-05, |
|
"loss": 0.8872, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.999988976667279e-05, |
|
"loss": 0.8895, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.999988609222855e-05, |
|
"loss": 0.8533, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.999988241778431e-05, |
|
"loss": 0.8515, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.999987874334007e-05, |
|
"loss": 0.8682, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.999987506889583e-05, |
|
"loss": 0.8345, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.99998713944516e-05, |
|
"loss": 0.8614, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.999986772000735e-05, |
|
"loss": 0.8481, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.999986404556311e-05, |
|
"loss": 0.8479, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.999986037111887e-05, |
|
"loss": 0.8467, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.999985669667464e-05, |
|
"loss": 0.8441, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.99998530222304e-05, |
|
"loss": 0.8087, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.999984934778616e-05, |
|
"loss": 0.8202, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.99998456733419e-05, |
|
"loss": 0.8231, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.999984199889766e-05, |
|
"loss": 0.8188, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.999983832445344e-05, |
|
"loss": 0.8103, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.99998346500092e-05, |
|
"loss": 0.8158, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.999983097556495e-05, |
|
"loss": 0.808, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.99998273011207e-05, |
|
"loss": 0.8146, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.999982362667647e-05, |
|
"loss": 0.797, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.999981995223223e-05, |
|
"loss": 0.7784, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.9999816277788e-05, |
|
"loss": 0.7864, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.999981260334375e-05, |
|
"loss": 0.7987, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.999980892889951e-05, |
|
"loss": 0.7757, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.999980525445527e-05, |
|
"loss": 0.7812, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.999980158001103e-05, |
|
"loss": 0.7801, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.6798496246337891, |
|
"eval_runtime": 1461.6898, |
|
"eval_samples_per_second": 119.156, |
|
"eval_steps_per_second": 7.448, |
|
"step": 10886 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.999979790556679e-05, |
|
"loss": 0.7013, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.999979423112255e-05, |
|
"loss": 0.6525, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.999979055667831e-05, |
|
"loss": 0.6677, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.999978688223407e-05, |
|
"loss": 0.6573, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.999978320778982e-05, |
|
"loss": 0.652, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.999977953334558e-05, |
|
"loss": 0.6554, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.999977585890135e-05, |
|
"loss": 0.6593, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.999977218445711e-05, |
|
"loss": 0.6634, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.999976851001287e-05, |
|
"loss": 0.653, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 9.999976483556862e-05, |
|
"loss": 0.6617, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 9.999976116112438e-05, |
|
"loss": 0.6567, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.999975748668014e-05, |
|
"loss": 0.6683, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 9.999975381223591e-05, |
|
"loss": 0.6681, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.999975013779166e-05, |
|
"loss": 0.6501, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.999974646334742e-05, |
|
"loss": 0.664, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 9.999974278890318e-05, |
|
"loss": 0.6702, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.999973911445894e-05, |
|
"loss": 0.6535, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.99997354400147e-05, |
|
"loss": 0.6614, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.999973176557046e-05, |
|
"loss": 0.6584, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 9.999972809112622e-05, |
|
"loss": 0.649, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 9.999972441668198e-05, |
|
"loss": 0.6489, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 9.999972074223775e-05, |
|
"loss": 0.6568, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.999971706779349e-05, |
|
"loss": 0.645, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.999971339334925e-05, |
|
"loss": 0.6554, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 9.999970971890503e-05, |
|
"loss": 0.6395, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.999970604446079e-05, |
|
"loss": 0.6497, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9.999970237001653e-05, |
|
"loss": 0.6485, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.99996986955723e-05, |
|
"loss": 0.6383, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.999969502112805e-05, |
|
"loss": 0.64, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.999969134668382e-05, |
|
"loss": 0.6318, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.999968767223959e-05, |
|
"loss": 0.6381, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.999968399779534e-05, |
|
"loss": 0.6484, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.99996803233511e-05, |
|
"loss": 0.6491, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.999967664890686e-05, |
|
"loss": 0.6428, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.999967297446262e-05, |
|
"loss": 0.6356, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.999966930001838e-05, |
|
"loss": 0.637, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.999966562557414e-05, |
|
"loss": 0.6364, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.99996619511299e-05, |
|
"loss": 0.6473, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.999965827668566e-05, |
|
"loss": 0.6342, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.99996546022414e-05, |
|
"loss": 0.6395, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.999965092779717e-05, |
|
"loss": 0.6441, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 9.999964725335294e-05, |
|
"loss": 0.6461, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 9.99996435789087e-05, |
|
"loss": 0.6157, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.999963990446446e-05, |
|
"loss": 0.6233, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 9.999963623002021e-05, |
|
"loss": 0.6305, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.999963255557597e-05, |
|
"loss": 0.6277, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 9.999962888113173e-05, |
|
"loss": 0.6339, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 9.99996252066875e-05, |
|
"loss": 0.6271, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9.999962153224325e-05, |
|
"loss": 0.6145, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 9.999961785779901e-05, |
|
"loss": 0.6299, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.999961418335477e-05, |
|
"loss": 0.6216, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.999961050891053e-05, |
|
"loss": 0.6167, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.999960683446629e-05, |
|
"loss": 0.6254, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 9.999960316002205e-05, |
|
"loss": 0.6376, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5107570290565491, |
|
"eval_runtime": 1433.9446, |
|
"eval_samples_per_second": 121.461, |
|
"eval_steps_per_second": 7.592, |
|
"step": 21772 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999959948557781e-05, |
|
"loss": 0.6037, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.999959581113357e-05, |
|
"loss": 0.4903, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.999959213668933e-05, |
|
"loss": 0.4901, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.999958846224508e-05, |
|
"loss": 0.4826, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.999958478780085e-05, |
|
"loss": 0.4881, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.999958111335662e-05, |
|
"loss": 0.4921, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.999957743891238e-05, |
|
"loss": 0.502, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.999957376446812e-05, |
|
"loss": 0.4976, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.999957009002388e-05, |
|
"loss": 0.491, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.999956641557964e-05, |
|
"loss": 0.5033, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 9.99995627411354e-05, |
|
"loss": 0.4924, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.999955906669118e-05, |
|
"loss": 0.5026, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 9.999955539224692e-05, |
|
"loss": 0.4966, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.999955171780269e-05, |
|
"loss": 0.4963, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 9.999954804335845e-05, |
|
"loss": 0.5072, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 9.99995443689142e-05, |
|
"loss": 0.4907, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.999954069446997e-05, |
|
"loss": 0.4938, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 9.999953702002573e-05, |
|
"loss": 0.5035, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 9.999953334558149e-05, |
|
"loss": 0.5006, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 9.999952967113725e-05, |
|
"loss": 0.4992, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 9.9999525996693e-05, |
|
"loss": 0.5109, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.999952232224876e-05, |
|
"loss": 0.4994, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.999951864780453e-05, |
|
"loss": 0.4925, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.999951497336029e-05, |
|
"loss": 0.5073, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.999951129891605e-05, |
|
"loss": 0.5061, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.99995076244718e-05, |
|
"loss": 0.4954, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.999950395002756e-05, |
|
"loss": 0.5123, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.999950027558332e-05, |
|
"loss": 0.5049, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.999949660113909e-05, |
|
"loss": 0.4972, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 9.999949292669484e-05, |
|
"loss": 0.505, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.99994892522506e-05, |
|
"loss": 0.52, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 9.999948557780636e-05, |
|
"loss": 0.5077, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 9.999948190336212e-05, |
|
"loss": 0.5159, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.999947822891788e-05, |
|
"loss": 0.5054, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.999947455447364e-05, |
|
"loss": 0.4999, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 9.99994708800294e-05, |
|
"loss": 0.5035, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 9.999946720558516e-05, |
|
"loss": 0.5041, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 9.999946353114092e-05, |
|
"loss": 0.4998, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.999945985669667e-05, |
|
"loss": 0.5098, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.999945618225244e-05, |
|
"loss": 0.5102, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.99994525078082e-05, |
|
"loss": 0.5031, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 9.999944883336396e-05, |
|
"loss": 0.5015, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 9.999944515891971e-05, |
|
"loss": 0.505, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 9.999944148447547e-05, |
|
"loss": 0.5146, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 9.999943781003123e-05, |
|
"loss": 0.5101, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 9.9999434135587e-05, |
|
"loss": 0.5155, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.999943046114277e-05, |
|
"loss": 0.5076, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 9.999942678669851e-05, |
|
"loss": 0.5157, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 9.999942311225427e-05, |
|
"loss": 0.5046, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.999941943781003e-05, |
|
"loss": 0.5078, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 9.99994157633658e-05, |
|
"loss": 0.5096, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.999941208892156e-05, |
|
"loss": 0.5057, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.999940841447732e-05, |
|
"loss": 0.5171, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 9.999940474003308e-05, |
|
"loss": 0.5212, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.999940106558884e-05, |
|
"loss": 0.5132, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.39297839999198914, |
|
"eval_runtime": 1401.043, |
|
"eval_samples_per_second": 124.314, |
|
"eval_steps_per_second": 7.77, |
|
"step": 32658 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 9.999939739114458e-05, |
|
"loss": 0.4123, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 9.999939371670036e-05, |
|
"loss": 0.3688, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 9.999939004225612e-05, |
|
"loss": 0.3752, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 9.999938636781188e-05, |
|
"loss": 0.3733, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 9.999938269336764e-05, |
|
"loss": 0.371, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.999937901892339e-05, |
|
"loss": 0.3838, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 9.999937534447915e-05, |
|
"loss": 0.3865, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 9.999937167003491e-05, |
|
"loss": 0.3859, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 9.999936799559068e-05, |
|
"loss": 0.3882, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 9.999936432114643e-05, |
|
"loss": 0.3942, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.999936064670219e-05, |
|
"loss": 0.3843, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.999935697225795e-05, |
|
"loss": 0.3859, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.999935329781371e-05, |
|
"loss": 0.3947, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 9.999934962336947e-05, |
|
"loss": 0.3934, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 9.999934594892523e-05, |
|
"loss": 0.3932, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 9.999934227448099e-05, |
|
"loss": 0.3977, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 9.999933860003675e-05, |
|
"loss": 0.4046, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 9.999933492559251e-05, |
|
"loss": 0.3961, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 9.999933125114826e-05, |
|
"loss": 0.398, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 9.999932757670403e-05, |
|
"loss": 0.393, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 9.999932390225979e-05, |
|
"loss": 0.4043, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.999932022781555e-05, |
|
"loss": 0.4042, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 9.99993165533713e-05, |
|
"loss": 0.3996, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 9.999931287892706e-05, |
|
"loss": 0.3995, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.999930920448282e-05, |
|
"loss": 0.398, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 9.99993055300386e-05, |
|
"loss": 0.3978, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 9.999930185559434e-05, |
|
"loss": 0.4039, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 9.99992981811501e-05, |
|
"loss": 0.3983, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 9.999929450670586e-05, |
|
"loss": 0.4099, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 9.999929083226162e-05, |
|
"loss": 0.3965, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 9.999928715781738e-05, |
|
"loss": 0.4027, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.999928348337314e-05, |
|
"loss": 0.406, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.99992798089289e-05, |
|
"loss": 0.415, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.999927613448467e-05, |
|
"loss": 0.4055, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 9.999927246004043e-05, |
|
"loss": 0.4058, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 9.999926878559617e-05, |
|
"loss": 0.4051, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 9.999926511115195e-05, |
|
"loss": 0.4088, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 9.999926143670771e-05, |
|
"loss": 0.4073, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 9.999925776226347e-05, |
|
"loss": 0.4075, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 9.999925408781923e-05, |
|
"loss": 0.4071, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 9.999925041337498e-05, |
|
"loss": 0.4136, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 9.999924673893074e-05, |
|
"loss": 0.4143, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 9.999924306448651e-05, |
|
"loss": 0.415, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 9.999923939004227e-05, |
|
"loss": 0.4157, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 9.999923571559802e-05, |
|
"loss": 0.406, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 9.999923204115378e-05, |
|
"loss": 0.416, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 9.999922836670954e-05, |
|
"loss": 0.4142, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 9.99992246922653e-05, |
|
"loss": 0.4109, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 9.999922101782106e-05, |
|
"loss": 0.4161, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 9.999921734337682e-05, |
|
"loss": 0.408, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 9.999921366893258e-05, |
|
"loss": 0.4162, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 9.999920999448834e-05, |
|
"loss": 0.4165, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 9.99992063200441e-05, |
|
"loss": 0.4167, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 9.999920264559986e-05, |
|
"loss": 0.4179, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.3084418773651123, |
|
"eval_runtime": 1411.8768, |
|
"eval_samples_per_second": 123.36, |
|
"eval_steps_per_second": 7.71, |
|
"step": 43544 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.999919897115562e-05, |
|
"loss": 0.3814, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.999919529671138e-05, |
|
"loss": 0.2863, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.999919162226714e-05, |
|
"loss": 0.2924, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 9.999918794782289e-05, |
|
"loss": 0.2942, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.999918427337865e-05, |
|
"loss": 0.3034, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 9.999918059893441e-05, |
|
"loss": 0.3002, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 9.999917692449018e-05, |
|
"loss": 0.3029, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 9.999917325004593e-05, |
|
"loss": 0.2977, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 9.999916957560169e-05, |
|
"loss": 0.3062, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 9.999916590115745e-05, |
|
"loss": 0.3075, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 9.999916222671321e-05, |
|
"loss": 0.3137, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 9.999915855226897e-05, |
|
"loss": 0.3037, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 9.999915487782473e-05, |
|
"loss": 0.31, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 9.99991512033805e-05, |
|
"loss": 0.3141, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 9.999914752893625e-05, |
|
"loss": 0.3166, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 9.999914385449201e-05, |
|
"loss": 0.3125, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 9.999914018004776e-05, |
|
"loss": 0.3109, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 9.999913650560354e-05, |
|
"loss": 0.3169, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 9.99991328311593e-05, |
|
"loss": 0.3211, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 9.999912915671506e-05, |
|
"loss": 0.3278, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 9.999912548227082e-05, |
|
"loss": 0.3154, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 9.999912180782656e-05, |
|
"loss": 0.3167, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 9.999911813338232e-05, |
|
"loss": 0.3172, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 9.99991144589381e-05, |
|
"loss": 0.3243, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 9.999911078449386e-05, |
|
"loss": 0.3286, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 9.99991071100496e-05, |
|
"loss": 0.3268, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 9.999910343560537e-05, |
|
"loss": 0.3204, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 9.999909976116113e-05, |
|
"loss": 0.3311, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 9.999909608671689e-05, |
|
"loss": 0.3241, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.999909241227265e-05, |
|
"loss": 0.3305, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 9.999908873782841e-05, |
|
"loss": 0.3238, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 9.999908506338417e-05, |
|
"loss": 0.3283, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 9.999908138893993e-05, |
|
"loss": 0.3275, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 9.999907771449569e-05, |
|
"loss": 0.3302, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 9.999907404005145e-05, |
|
"loss": 0.3342, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 9.999907036560721e-05, |
|
"loss": 0.3363, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 9.999906669116297e-05, |
|
"loss": 0.3319, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 9.999906301671873e-05, |
|
"loss": 0.3288, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 9.999905934227448e-05, |
|
"loss": 0.3349, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 9.999905566783024e-05, |
|
"loss": 0.3289, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 9.999905199338601e-05, |
|
"loss": 0.3393, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 9.999904831894177e-05, |
|
"loss": 0.3388, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 9.999904464449752e-05, |
|
"loss": 0.3322, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 9.999904097005328e-05, |
|
"loss": 0.332, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 9.999903729560904e-05, |
|
"loss": 0.3378, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 9.99990336211648e-05, |
|
"loss": 0.3369, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 9.999902994672056e-05, |
|
"loss": 0.337, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 9.999902627227632e-05, |
|
"loss": 0.3434, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 9.999902259783208e-05, |
|
"loss": 0.3326, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 9.999901892338784e-05, |
|
"loss": 0.3382, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 9.99990152489436e-05, |
|
"loss": 0.3443, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 9.999901157449936e-05, |
|
"loss": 0.3405, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 9.999900790005512e-05, |
|
"loss": 0.3454, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.999900422561088e-05, |
|
"loss": 0.3499, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.999900055116665e-05, |
|
"loss": 0.3442, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.24815598130226135, |
|
"eval_runtime": 1424.5159, |
|
"eval_samples_per_second": 122.265, |
|
"eval_steps_per_second": 7.642, |
|
"step": 54430 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 9.999899687672239e-05, |
|
"loss": 0.2559, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 9.999899320227815e-05, |
|
"loss": 0.2383, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 9.999898952783391e-05, |
|
"loss": 0.2416, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 9.999898585338969e-05, |
|
"loss": 0.237, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 9.999898217894545e-05, |
|
"loss": 0.2445, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 9.99989785045012e-05, |
|
"loss": 0.2412, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 9.999897483005695e-05, |
|
"loss": 0.2431, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 9.999897115561272e-05, |
|
"loss": 0.2449, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 9.999896748116848e-05, |
|
"loss": 0.2461, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 9.999896380672424e-05, |
|
"loss": 0.2527, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 9.999896013228e-05, |
|
"loss": 0.2522, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 9.999895645783576e-05, |
|
"loss": 0.2485, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 9.999895278339152e-05, |
|
"loss": 0.2517, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 9.999894910894728e-05, |
|
"loss": 0.2572, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 9.999894543450304e-05, |
|
"loss": 0.2574, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 9.99989417600588e-05, |
|
"loss": 0.2551, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 9.999893808561456e-05, |
|
"loss": 0.2584, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 9.999893441117032e-05, |
|
"loss": 0.2607, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 9.999893073672607e-05, |
|
"loss": 0.2631, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 9.999892706228183e-05, |
|
"loss": 0.2609, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 9.99989233878376e-05, |
|
"loss": 0.265, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.999891971339336e-05, |
|
"loss": 0.2625, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 9.999891603894911e-05, |
|
"loss": 0.2648, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 9.999891236450487e-05, |
|
"loss": 0.2677, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 9.999890869006063e-05, |
|
"loss": 0.2667, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 9.999890501561639e-05, |
|
"loss": 0.2623, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.999890134117216e-05, |
|
"loss": 0.2713, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 9.999889766672791e-05, |
|
"loss": 0.2659, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 9.999889399228367e-05, |
|
"loss": 0.2688, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 9.999889031783943e-05, |
|
"loss": 0.2716, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 9.999888664339519e-05, |
|
"loss": 0.2723, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 9.999888296895095e-05, |
|
"loss": 0.2724, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 9.999887929450671e-05, |
|
"loss": 0.2697, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 9.999887562006247e-05, |
|
"loss": 0.2749, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 9.999887194561823e-05, |
|
"loss": 0.273, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 9.999886827117398e-05, |
|
"loss": 0.2816, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 9.999886459672974e-05, |
|
"loss": 0.2742, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 9.999886092228552e-05, |
|
"loss": 0.2788, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 9.999885724784128e-05, |
|
"loss": 0.2782, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 9.999885357339704e-05, |
|
"loss": 0.2804, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 9.999884989895278e-05, |
|
"loss": 0.2777, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 9.999884622450854e-05, |
|
"loss": 0.2822, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 9.99988425500643e-05, |
|
"loss": 0.2811, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 9.999883887562006e-05, |
|
"loss": 0.2795, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 9.999883520117583e-05, |
|
"loss": 0.2846, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 9.999883152673159e-05, |
|
"loss": 0.2887, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 9.999882785228735e-05, |
|
"loss": 0.2811, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 9.99988241778431e-05, |
|
"loss": 0.283, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 9.999882050339887e-05, |
|
"loss": 0.2822, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 9.999881682895463e-05, |
|
"loss": 0.2853, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 9.999881315451039e-05, |
|
"loss": 0.2869, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 9.999880948006615e-05, |
|
"loss": 0.283, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 9.999880580562191e-05, |
|
"loss": 0.2893, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.999880213117766e-05, |
|
"loss": 0.2863, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.20682939887046814, |
|
"eval_runtime": 1457.2635, |
|
"eval_samples_per_second": 119.518, |
|
"eval_steps_per_second": 7.47, |
|
"step": 65316 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.999879845673342e-05, |
|
"loss": 0.2505, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 9.999879478228919e-05, |
|
"loss": 0.1957, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 9.999879110784495e-05, |
|
"loss": 0.1986, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 9.99987874334007e-05, |
|
"loss": 0.1984, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 9.999878375895646e-05, |
|
"loss": 0.1983, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 9.999878008451222e-05, |
|
"loss": 0.203, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 9.999877641006798e-05, |
|
"loss": 0.203, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 9.999877273562375e-05, |
|
"loss": 0.2047, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 9.99987690611795e-05, |
|
"loss": 0.208, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 9.999876538673526e-05, |
|
"loss": 0.2103, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 9.999876171229102e-05, |
|
"loss": 0.2141, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 9.999875803784678e-05, |
|
"loss": 0.2101, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 9.999875436340254e-05, |
|
"loss": 0.2157, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 9.99987506889583e-05, |
|
"loss": 0.2173, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 9.999874701451406e-05, |
|
"loss": 0.2143, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 9.999874334006982e-05, |
|
"loss": 0.2156, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 9.999873966562557e-05, |
|
"loss": 0.2181, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 9.999873599118133e-05, |
|
"loss": 0.2215, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 9.99987323167371e-05, |
|
"loss": 0.2214, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 9.999872864229286e-05, |
|
"loss": 0.221, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 9.999872496784863e-05, |
|
"loss": 0.2254, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 9.999872129340437e-05, |
|
"loss": 0.2235, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 9.999871761896013e-05, |
|
"loss": 0.2231, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 9.999871394451589e-05, |
|
"loss": 0.2267, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 9.999871027007167e-05, |
|
"loss": 0.2239, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 9.999870659562741e-05, |
|
"loss": 0.227, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 9.999870292118317e-05, |
|
"loss": 0.2277, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 9.999869924673893e-05, |
|
"loss": 0.2245, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 9.99986955722947e-05, |
|
"loss": 0.2284, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 9.999869189785046e-05, |
|
"loss": 0.2308, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 9.999868822340622e-05, |
|
"loss": 0.2348, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 9.999868454896198e-05, |
|
"loss": 0.2289, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 9.999868087451774e-05, |
|
"loss": 0.2311, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 9.99986772000735e-05, |
|
"loss": 0.2308, |
|
"step": 72000 |
|
} |
|
], |
|
"max_steps": 5443000000, |
|
"num_train_epochs": 500000, |
|
"total_flos": 2.6739082122913382e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|