|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"global_step": 41656, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004939984636066834, |
|
"loss": 1.91, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004879969272133666, |
|
"loss": 1.7724, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00048199539082004997, |
|
"loss": 1.7095, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00047599385442673327, |
|
"loss": 1.6602, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004699923180334166, |
|
"loss": 1.6298, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004639907816400999, |
|
"loss": 1.6032, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004579892452467832, |
|
"loss": 1.5764, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00045198770885346653, |
|
"loss": 1.5442, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00044598617246014983, |
|
"loss": 1.5179, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00043998463606683314, |
|
"loss": 1.5161, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0004339830996735164, |
|
"loss": 1.421, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00042798156328019974, |
|
"loss": 1.3698, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00042198002688688304, |
|
"loss": 1.389, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00041597849049356634, |
|
"loss": 1.3715, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00040997695410024965, |
|
"loss": 1.3656, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00040397541770693295, |
|
"loss": 1.3575, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0003979738813136163, |
|
"loss": 1.3539, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0003919723449202996, |
|
"loss": 1.3292, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0003859708085269829, |
|
"loss": 1.3215, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0003799692721336662, |
|
"loss": 1.3067, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0003739677357403495, |
|
"loss": 1.2928, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00036796619934703287, |
|
"loss": 1.2058, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00036196466295371617, |
|
"loss": 1.2242, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00035596312656039947, |
|
"loss": 1.2148, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00034996159016708277, |
|
"loss": 1.2109, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0003439600537737661, |
|
"loss": 1.2241, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00033795851738044943, |
|
"loss": 1.221, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00033195698098713273, |
|
"loss": 1.2042, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00032595544459381603, |
|
"loss": 1.2028, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00031995390820049933, |
|
"loss": 1.2017, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.0003139523718071827, |
|
"loss": 1.1904, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.000307950835413866, |
|
"loss": 1.1294, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00030194929902054924, |
|
"loss": 1.1156, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00029594776262723254, |
|
"loss": 1.1174, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00028994622623391584, |
|
"loss": 1.1084, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.0002839446898405992, |
|
"loss": 1.1026, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 0.0002779431534472825, |
|
"loss": 1.1107, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0002719416170539658, |
|
"loss": 1.1061, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.0002659400806606491, |
|
"loss": 1.1148, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0002599385442673324, |
|
"loss": 1.1097, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00025393700787401576, |
|
"loss": 1.1013, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 0.00024793547148069906, |
|
"loss": 1.0682, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00024193393508738237, |
|
"loss": 1.0297, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.0002359323986940657, |
|
"loss": 1.0325, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.000229930862300749, |
|
"loss": 1.0255, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 0.00022392932590743232, |
|
"loss": 1.0325, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00021792778951411563, |
|
"loss": 1.0336, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.00021192625312079893, |
|
"loss": 1.0434, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.00020592471672748226, |
|
"loss": 1.0244, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00019992318033416553, |
|
"loss": 1.0333, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.00019392164394084886, |
|
"loss": 1.033, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.00018792010754753216, |
|
"loss": 1.0327, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0001819185711542155, |
|
"loss": 0.9682, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 0.0001759170347608988, |
|
"loss": 0.9799, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.0001699154983675821, |
|
"loss": 0.9725, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00016391396197426542, |
|
"loss": 0.9675, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00015791242558094873, |
|
"loss": 0.9746, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.00015191088918763205, |
|
"loss": 0.9726, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00014590935279431536, |
|
"loss": 0.9804, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00013990781640099868, |
|
"loss": 0.9677, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00013390628000768196, |
|
"loss": 0.9757, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.00012790474361436526, |
|
"loss": 0.9504, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00012190320722104859, |
|
"loss": 0.9511, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.00011590167082773189, |
|
"loss": 0.9176, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00010990013443441521, |
|
"loss": 0.9248, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00010389859804109852, |
|
"loss": 0.9145, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 9.789706164778184e-05, |
|
"loss": 0.9163, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 9.189552525446515e-05, |
|
"loss": 0.9233, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 8.589398886114845e-05, |
|
"loss": 0.9315, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 7.989245246783177e-05, |
|
"loss": 0.9184, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 7.389091607451507e-05, |
|
"loss": 0.9263, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 6.788937968119839e-05, |
|
"loss": 0.9189, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 6.18878432878817e-05, |
|
"loss": 0.912, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 5.588630689456501e-05, |
|
"loss": 0.89, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 4.9884770501248326e-05, |
|
"loss": 0.8878, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.388323410793163e-05, |
|
"loss": 0.8822, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 3.7881697714614944e-05, |
|
"loss": 0.8887, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 3.188016132129825e-05, |
|
"loss": 0.8717, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 2.5878624927981564e-05, |
|
"loss": 0.8945, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.9877088534664873e-05, |
|
"loss": 0.8851, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 1.3875552141348186e-05, |
|
"loss": 0.8871, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 7.874015748031496e-06, |
|
"loss": 0.8934, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 1.872479354714807e-06, |
|
"loss": 0.8863, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 41656, |
|
"total_flos": 4.8424619578097664e+17, |
|
"train_loss": 1.139437837565909, |
|
"train_runtime": 38132.6596, |
|
"train_samples_per_second": 34.957, |
|
"train_steps_per_second": 1.092 |
|
} |
|
], |
|
"max_steps": 41656, |
|
"num_train_epochs": 8, |
|
"total_flos": 4.8424619578097664e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|