|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999729483584162, |
|
"global_step": 13862, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.8839221341023793e-05, |
|
"loss": 1.4339, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.767844268204759e-05, |
|
"loss": 1.4587, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 7.927535070140282e-05, |
|
"loss": 1.5324, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 7.606893787575151e-05, |
|
"loss": 1.5695, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 7.286252505010021e-05, |
|
"loss": 1.5931, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.96561122244489e-05, |
|
"loss": 1.5994, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.64496993987976e-05, |
|
"loss": 1.6028, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.32432865731463e-05, |
|
"loss": 1.5965, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.0036873747494996e-05, |
|
"loss": 1.6002, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 5.683046092184369e-05, |
|
"loss": 1.6017, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.362404809619239e-05, |
|
"loss": 1.5735, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.041763527054109e-05, |
|
"loss": 1.5765, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.7211222444889784e-05, |
|
"loss": 1.5713, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.400480961923849e-05, |
|
"loss": 1.5619, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0798396793587175e-05, |
|
"loss": 1.5509, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.759198396793588e-05, |
|
"loss": 1.5421, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.438557114228457e-05, |
|
"loss": 1.5299, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.117915831663327e-05, |
|
"loss": 1.5285, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.7972745490981967e-05, |
|
"loss": 1.5326, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.4766332665330663e-05, |
|
"loss": 1.5119, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.1559919839679358e-05, |
|
"loss": 1.5147, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.8353507014028057e-05, |
|
"loss": 1.4978, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5147094188376754e-05, |
|
"loss": 1.4914, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.1940681362725453e-05, |
|
"loss": 1.4889, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.734268537074148e-06, |
|
"loss": 1.4894, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.527855711422846e-06, |
|
"loss": 1.4743, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.321442885771543e-06, |
|
"loss": 1.4627, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 13862, |
|
"total_flos": 7.817060132659814e+17, |
|
"train_loss": 1.5347596183515975, |
|
"train_runtime": 69651.7637, |
|
"train_samples_per_second": 6.369, |
|
"train_steps_per_second": 0.199 |
|
} |
|
], |
|
"max_steps": 13862, |
|
"num_train_epochs": 1, |
|
"total_flos": 7.817060132659814e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|