|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0404473923787227, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.9921875, |
|
"learning_rate": 1.7241379310344828e-05, |
|
"loss": 1.8089, |
|
"max_memory_allocated (GB)": 92.84, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7890625, |
|
"learning_rate": 3.4482758620689657e-05, |
|
"loss": 1.7328, |
|
"max_memory_allocated (GB)": 92.85, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.66015625, |
|
"learning_rate": 5.172413793103449e-05, |
|
"loss": 1.5707, |
|
"max_memory_allocated (GB)": 92.87, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 6.896551724137931e-05, |
|
"loss": 1.3808, |
|
"max_memory_allocated (GB)": 92.87, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 8.620689655172413e-05, |
|
"loss": 1.3155, |
|
"max_memory_allocated (GB)": 92.87, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.23046875, |
|
"learning_rate": 9.999971594167742e-05, |
|
"loss": 1.2608, |
|
"max_memory_allocated (GB)": 92.87, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 9.998977423927714e-05, |
|
"loss": 1.2335, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 9.996563284814788e-05, |
|
"loss": 1.1979, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 9.992729862569695e-05, |
|
"loss": 1.1849, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 9.987478246083175e-05, |
|
"loss": 1.1664, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 9.980809927086704e-05, |
|
"loss": 1.1494, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 9.972726799728744e-05, |
|
"loss": 1.1468, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 9.963231160036714e-05, |
|
"loss": 1.1467, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 9.952325705264806e-05, |
|
"loss": 1.1432, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 9.940013533127812e-05, |
|
"loss": 1.1268, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 9.926298140921221e-05, |
|
"loss": 1.1197, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 9.911183424527801e-05, |
|
"loss": 1.1022, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 9.894673677310972e-05, |
|
"loss": 1.0934, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 9.876773588895265e-05, |
|
"loss": 1.0925, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 9.857488243834219e-05, |
|
"loss": 1.1038, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 9.836823120166116e-05, |
|
"loss": 1.0926, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5, |
|
"learning_rate": 9.814784087857927e-05, |
|
"loss": 1.0955, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 9.791377407137936e-05, |
|
"loss": 1.084, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 9.766609726717515e-05, |
|
"loss": 1.0855, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.740488081902539e-05, |
|
"loss": 1.0755, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 9.713019892595003e-05, |
|
"loss": 1.0788, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 9.684212961185374e-05, |
|
"loss": 1.0705, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 9.654075470336317e-05, |
|
"loss": 1.0776, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 9.622615980658391e-05, |
|
"loss": 1.0759, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 9.589843428278388e-05, |
|
"loss": 1.0685, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.555767122301016e-05, |
|
"loss": 1.0794, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 9.520396742164624e-05, |
|
"loss": 1.0648, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 9.483742334891746e-05, |
|
"loss": 1.0656, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 9.44581431223522e-05, |
|
"loss": 1.0547, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 9.40662344772071e-05, |
|
"loss": 1.0675, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 9.366180873586475e-05, |
|
"loss": 1.0626, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 9.32449807762122e-05, |
|
"loss": 1.0587, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.281586899900985e-05, |
|
"loss": 1.0601, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 9.237459529425938e-05, |
|
"loss": 1.0606, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 9.192128500658068e-05, |
|
"loss": 1.0568, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 9.145606689960756e-05, |
|
"loss": 1.0591, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 9.097907311941208e-05, |
|
"loss": 1.0533, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 9.049043915696831e-05, |
|
"loss": 1.0483, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 8.99903038096658e-05, |
|
"loss": 1.0501, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 8.947880914188397e-05, |
|
"loss": 1.053, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 8.89561004446384e-05, |
|
"loss": 1.0441, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 8.842232619431074e-05, |
|
"loss": 1.0575, |
|
"max_memory_allocated (GB)": 92.88, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 8.78776380104736e-05, |
|
"loss": 1.0639, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 8.732219061282278e-05, |
|
"loss": 1.0463, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 8.675614177722895e-05, |
|
"loss": 1.0483, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 8.61796522909209e-05, |
|
"loss": 1.044, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 8.559288590681387e-05, |
|
"loss": 1.0478, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 8.4996009296995e-05, |
|
"loss": 1.0426, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 8.438919200538003e-05, |
|
"loss": 1.0488, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 8.377260639955385e-05, |
|
"loss": 1.0434, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 8.314642762180927e-05, |
|
"loss": 1.04, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 8.251083353939752e-05, |
|
"loss": 1.0406, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 8.186600469400467e-05, |
|
"loss": 1.0537, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 8.12121242504685e-05, |
|
"loss": 1.038, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 8.05493779447501e-05, |
|
"loss": 1.0425, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 7.987795403117529e-05, |
|
"loss": 1.0336, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 7.919804322896062e-05, |
|
"loss": 1.0397, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 7.850983866803922e-05, |
|
"loss": 1.0388, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 7.78135358342018e-05, |
|
"loss": 1.0301, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 7.71093325135687e-05, |
|
"loss": 1.0341, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 7.639742873640825e-05, |
|
"loss": 1.0348, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 7.56780267203178e-05, |
|
"loss": 1.036, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 7.495133081278356e-05, |
|
"loss": 1.0322, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 7.421754743313514e-05, |
|
"loss": 1.039, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 7.347688501391187e-05, |
|
"loss": 1.0402, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 7.272955394165717e-05, |
|
"loss": 1.0326, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 7.197576649715771e-05, |
|
"loss": 1.0304, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 7.121573679514484e-05, |
|
"loss": 1.0314, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 7.044968072347473e-05, |
|
"loss": 1.0427, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 6.967781588180521e-05, |
|
"loss": 1.0365, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 6.890036151978598e-05, |
|
"loss": 1.026, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 6.811753847478051e-05, |
|
"loss": 1.0243, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 6.732956910913661e-05, |
|
"loss": 1.0348, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 6.653667724702418e-05, |
|
"loss": 1.0218, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 6.573908811085734e-05, |
|
"loss": 1.0267, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 6.493702825731976e-05, |
|
"loss": 1.0316, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 6.41307255130107e-05, |
|
"loss": 1.0252, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 6.33204089097304e-05, |
|
"loss": 1.0267, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 6.250630861942333e-05, |
|
"loss": 1.0194, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 6.16886558887973e-05, |
|
"loss": 1.0328, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.283203125, |
|
"learning_rate": 6.0867682973637394e-05, |
|
"loss": 1.0136, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 6.004362307283335e-05, |
|
"loss": 1.037, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 5.921671026213893e-05, |
|
"loss": 1.0175, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 5.838717942768226e-05, |
|
"loss": 1.0266, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 5.755526619924605e-05, |
|
"loss": 1.0358, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 5.672120688333642e-05, |
|
"loss": 1.0214, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 5.588523839605968e-05, |
|
"loss": 1.0225, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 5.504759819582581e-05, |
|
"loss": 1.0307, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 5.4208524215897985e-05, |
|
"loss": 1.0187, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 5.3368254796807196e-05, |
|
"loss": 1.0284, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 5.2527028618651117e-05, |
|
"loss": 1.0252, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 5.1685084633296665e-05, |
|
"loss": 1.0183, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 5.084266199650523e-05, |
|
"loss": 1.0235, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0193, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 4.915733800349477e-05, |
|
"loss": 1.0199, |
|
"max_memory_allocated (GB)": 92.89, |
|
"memory_allocated (GB)": 20.53, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1922, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 1.119482194910249e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|