|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 10.0, |
|
"global_step": 27520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0025, |
|
"loss": 8.8155, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.005, |
|
"loss": 8.0292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.004905731523378582, |
|
"loss": 8.3372, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.004811463046757164, |
|
"loss": 8.756, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.004717194570135747, |
|
"loss": 8.8022, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.004622926093514329, |
|
"loss": 8.8056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.004528657616892911, |
|
"loss": 8.7766, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.004434389140271493, |
|
"loss": 8.7784, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.004340120663650075, |
|
"loss": 8.6866, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.004245852187028658, |
|
"loss": 8.6724, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00415158371040724, |
|
"loss": 8.6884, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.004057315233785822, |
|
"loss": 8.674, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 0.003963046757164404, |
|
"loss": 8.6885, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0038687782805429866, |
|
"loss": 8.6323, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0037745098039215687, |
|
"loss": 8.6416, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 0.0036802413273001513, |
|
"loss": 8.6168, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.003585972850678733, |
|
"loss": 8.5761, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.003491704374057315, |
|
"loss": 8.5403, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.0033974358974358976, |
|
"loss": 8.5917, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.0033031674208144797, |
|
"loss": 8.5902, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0032088989441930622, |
|
"loss": 8.5589, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.0031146304675716443, |
|
"loss": 8.5885, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 0.003020361990950226, |
|
"loss": 8.5635, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 0.0029260935143288085, |
|
"loss": 8.581, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0028318250377073906, |
|
"loss": 8.5647, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.002737556561085973, |
|
"loss": 8.5448, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0026432880844645553, |
|
"loss": 8.5538, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.002549019607843137, |
|
"loss": 8.5544, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.0024547511312217195, |
|
"loss": 8.5452, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 0.0023604826546003016, |
|
"loss": 8.564, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 0.0022662141779788837, |
|
"loss": 8.5346, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 0.0021719457013574662, |
|
"loss": 8.5496, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 0.0020776772247360483, |
|
"loss": 8.5342, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 0.0019834087481146304, |
|
"loss": 8.4924, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 0.0018891402714932128, |
|
"loss": 8.499, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 0.0017948717948717949, |
|
"loss": 8.4993, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 0.0017006033182503772, |
|
"loss": 8.4997, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 0.0016063348416289595, |
|
"loss": 8.4866, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 0.0015120663650075414, |
|
"loss": 8.4729, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 0.0014177978883861237, |
|
"loss": 8.4802, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 0.0013235294117647058, |
|
"loss": 8.4606, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 0.0012292609351432881, |
|
"loss": 8.4512, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 0.0011349924585218702, |
|
"loss": 8.4592, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 0.0010407239819004526, |
|
"loss": 8.4624, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 0.0009464555052790347, |
|
"loss": 8.4251, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 0.000852187028657617, |
|
"loss": 8.4294, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 17.08, |
|
"learning_rate": 0.0007579185520361991, |
|
"loss": 8.4217, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 0.0006636500754147813, |
|
"loss": 8.403, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 0.0005693815987933635, |
|
"loss": 8.4101, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 0.00047511312217194567, |
|
"loss": 8.3884, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 0.0003808446455505279, |
|
"loss": 8.3881, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 0.0002865761689291101, |
|
"loss": 8.3882, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 19.26, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 8.3682, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 9.80392156862745e-05, |
|
"loss": 8.3625, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 3.770739064856712e-06, |
|
"loss": 8.3572, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 27520, |
|
"total_flos": 8.626295144448e+16, |
|
"train_loss": 8.536983246027036, |
|
"train_runtime": 34345.8231, |
|
"train_samples_per_second": 9.612, |
|
"train_steps_per_second": 0.801 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 27520, |
|
"num_train_epochs": 20, |
|
"save_steps": 2500, |
|
"total_flos": 8.626295144448e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|