|
{ |
|
"best_metric": 3.0450499057769775, |
|
"best_model_checkpoint": "output/metallica/checkpoint-315", |
|
"epoch": 5.0, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00013507870183531476, |
|
"loss": 3.3881, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00012884599993319768, |
|
"loss": 3.3444, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00011888735840752609, |
|
"loss": 3.3036, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001058186737011911, |
|
"loss": 3.3353, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.044818420726556e-05, |
|
"loss": 3.2497, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.372648442002871e-05, |
|
"loss": 3.2277, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.668773501204858e-05, |
|
"loss": 3.0355, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.0385704725240065e-05, |
|
"loss": 2.898, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.5828599592490882e-05, |
|
"loss": 2.9592, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3916710004507539e-05, |
|
"loss": 2.9574, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 5.38673186569003e-06, |
|
"loss": 2.9963, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.662053209561833e-07, |
|
"loss": 2.9366, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.13543438911438, |
|
"eval_runtime": 3.4469, |
|
"eval_samples_per_second": 47.289, |
|
"eval_steps_per_second": 6.092, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.408888099334633e-07, |
|
"loss": 2.948, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.137086214086682e-06, |
|
"loss": 2.811, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.1920020081922749e-05, |
|
"loss": 2.9051, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.320835154085542e-05, |
|
"loss": 2.813, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.7303948905573005e-05, |
|
"loss": 2.6726, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.333506393059682e-05, |
|
"loss": 2.817, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 7.031024545323179e-05, |
|
"loss": 2.8553, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.71796561146101e-05, |
|
"loss": 2.7581, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00010290000000000001, |
|
"loss": 2.8057, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001164990457207046, |
|
"loss": 2.7321, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012713575447996587, |
|
"loss": 2.67, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00013415229447692924, |
|
"loss": 2.7242, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00013711472479561806, |
|
"loss": 2.7872, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.1001694202423096, |
|
"eval_runtime": 3.559, |
|
"eval_samples_per_second": 45.8, |
|
"eval_steps_per_second": 5.901, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00013583983266641012, |
|
"loss": 2.5963, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00013040646433810595, |
|
"loss": 2.4975, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0001211506487979619, |
|
"loss": 2.5517, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00010864481591530664, |
|
"loss": 2.454, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 9.36623942715347e-05, |
|
"loss": 2.4655, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.712997813881747e-05, |
|
"loss": 2.5144, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 6.007002186118257e-05, |
|
"loss": 2.4411, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.3537605728465284e-05, |
|
"loss": 2.49, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.8555184084693446e-05, |
|
"loss": 2.2522, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.6049351202038163e-05, |
|
"loss": 2.4637, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 6.793535661894062e-06, |
|
"loss": 2.4223, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.3601673335899086e-06, |
|
"loss": 2.255, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 3.0634868144989014, |
|
"eval_runtime": 3.6817, |
|
"eval_samples_per_second": 44.273, |
|
"eval_steps_per_second": 5.704, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 8.527520438192717e-08, |
|
"loss": 2.3823, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.047705523070765e-06, |
|
"loss": 2.3682, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.0064245520034058e-05, |
|
"loss": 2.3272, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.0700954279295363e-05, |
|
"loss": 2.2495, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.4300000000000014e-05, |
|
"loss": 2.0865, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.0020343885389815e-05, |
|
"loss": 2.279, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 6.688975454676822e-05, |
|
"loss": 2.1852, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 8.386493606940314e-05, |
|
"loss": 2.2131, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 9.989605109442691e-05, |
|
"loss": 2.2336, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 0.00011399164845914455, |
|
"loss": 2.2808, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.00012527997991807721, |
|
"loss": 2.4415, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.00013306291378591332, |
|
"loss": 2.1738, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.00013685911119006654, |
|
"loss": 2.3683, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 3.110128879547119, |
|
"eval_runtime": 3.7075, |
|
"eval_samples_per_second": 43.965, |
|
"eval_steps_per_second": 5.664, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.00013643379467904383, |
|
"loss": 2.0957, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00013181326813430994, |
|
"loss": 2.1122, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 0.00012328328999549248, |
|
"loss": 2.0913, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00011137140040750914, |
|
"loss": 2.0899, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 9.681429527476003e-05, |
|
"loss": 2.0885, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 8.051226498795145e-05, |
|
"loss": 2.054, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 6.347351557997137e-05, |
|
"loss": 2.0165, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 4.675181579273458e-05, |
|
"loss": 1.945, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 3.138132629880886e-05, |
|
"loss": 1.9216, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.8312641592473912e-05, |
|
"loss": 1.9411, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.354000066802353e-06, |
|
"loss": 1.7585, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.121298164685252e-06, |
|
"loss": 1.947, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.9567, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 3.0450499057769775, |
|
"eval_runtime": 3.7841, |
|
"eval_samples_per_second": 43.076, |
|
"eval_steps_per_second": 5.55, |
|
"step": 315 |
|
} |
|
], |
|
"max_steps": 315, |
|
"num_train_epochs": 5, |
|
"total_flos": 327268270080000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|