|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 3460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.855491329479769e-05, |
|
"loss": 2.85, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.710982658959538e-05, |
|
"loss": 2.5244, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.566473988439307e-05, |
|
"loss": 2.4551, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.421965317919075e-05, |
|
"loss": 2.3574, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.2774566473988445e-05, |
|
"loss": 2.3376, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.132947976878613e-05, |
|
"loss": 2.314, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.988439306358382e-05, |
|
"loss": 2.2859, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.84393063583815e-05, |
|
"loss": 2.2306, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.699421965317919e-05, |
|
"loss": 2.2102, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.554913294797688e-05, |
|
"loss": 2.2117, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.410404624277457e-05, |
|
"loss": 2.1662, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.265895953757225e-05, |
|
"loss": 2.1411, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.1213872832369946e-05, |
|
"loss": 2.1541, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.9768786127167632e-05, |
|
"loss": 2.1407, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 2.832369942196532e-05, |
|
"loss": 2.1051, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.6878612716763007e-05, |
|
"loss": 2.0974, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.5433526011560693e-05, |
|
"loss": 2.1, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.3988439306358382e-05, |
|
"loss": 2.0643, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.254335260115607e-05, |
|
"loss": 2.048, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.1098265895953757e-05, |
|
"loss": 2.0663, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.9653179190751446e-05, |
|
"loss": 2.0447, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.8208092485549132e-05, |
|
"loss": 2.0395, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.676300578034682e-05, |
|
"loss": 2.021, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.531791907514451e-05, |
|
"loss": 2.0177, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.3872832369942197e-05, |
|
"loss": 2.0087, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 1.2427745664739884e-05, |
|
"loss": 1.9849, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 1.0982658959537573e-05, |
|
"loss": 1.9991, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.53757225433526e-06, |
|
"loss": 1.9992, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.092485549132949e-06, |
|
"loss": 1.9687, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 6.647398843930635e-06, |
|
"loss": 1.9803, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.202312138728324e-06, |
|
"loss": 2.0046, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.757225433526012e-06, |
|
"loss": 1.9629, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.3121387283236993e-06, |
|
"loss": 1.9686, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 8.670520231213873e-07, |
|
"loss": 1.9876, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 3460, |
|
"total_flos": 3.302612726022144e+17, |
|
"train_loss": 2.1397388414151406, |
|
"train_runtime": 8310.1091, |
|
"train_samples_per_second": 13.29, |
|
"train_steps_per_second": 0.416 |
|
} |
|
], |
|
"max_steps": 3460, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.302612726022144e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|