|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9748571428571429, |
|
"global_step": 108, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-06, |
|
"loss": 5.2929, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1e-05, |
|
"loss": 5.1707, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 4.86, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2e-05, |
|
"loss": 4.909, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9995437844895337e-05, |
|
"loss": 4.4727, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9981755542233175e-05, |
|
"loss": 4.1336, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.995896557617091e-05, |
|
"loss": 3.5284, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.992708874098054e-05, |
|
"loss": 3.1435, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9886154122075344e-05, |
|
"loss": 2.9837, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.983619906947144e-05, |
|
"loss": 2.6885, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.977726916370847e-05, |
|
"loss": 2.6187, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9709418174260523e-05, |
|
"loss": 2.5325, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9632708010475166e-05, |
|
"loss": 2.2144, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.954720866508546e-05, |
|
"loss": 2.1453, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9452998150346403e-05, |
|
"loss": 2.0979, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9350162426854152e-05, |
|
"loss": 2.1127, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9238795325112867e-05, |
|
"loss": 1.9363, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.91189984599209e-05, |
|
"loss": 1.9072, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.899088113765426e-05, |
|
"loss": 1.8657, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8854560256532098e-05, |
|
"loss": 1.9458, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8710160199955158e-05, |
|
"loss": 1.8255, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8557812723014476e-05, |
|
"loss": 1.9809, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8397656832273982e-05, |
|
"loss": 1.4541, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8229838658936566e-05, |
|
"loss": 1.496, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.805451132550946e-05, |
|
"loss": 1.6486, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.7871834806090502e-05, |
|
"loss": 1.4413, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7681975780402807e-05, |
|
"loss": 1.4627, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7485107481711014e-05, |
|
"loss": 1.548, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7281409538757886e-05, |
|
"loss": 1.3278, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7071067811865477e-05, |
|
"loss": 1.4126, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.68542742233504e-05, |
|
"loss": 1.6469, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6631226582407954e-05, |
|
"loss": 1.2131, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.640212840462488e-05, |
|
"loss": 1.4731, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6167188726285433e-05, |
|
"loss": 1.2919, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.592662191364017e-05, |
|
"loss": 1.3771, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.568064746731156e-05, |
|
"loss": 1.3238, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.542948982201479e-05, |
|
"loss": 1.2339, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5173378141776569e-05, |
|
"loss": 1.2227, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4912546110838775e-05, |
|
"loss": 1.1856, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4647231720437687e-05, |
|
"loss": 1.2679, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4377677051653404e-05, |
|
"loss": 1.1049, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.410412805452757e-05, |
|
"loss": 1.0227, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3826834323650899e-05, |
|
"loss": 1.2718, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.3546048870425356e-05, |
|
"loss": 0.9942, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3262027892208696e-05, |
|
"loss": 1.2092, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.297503053855203e-05, |
|
"loss": 1.0914, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.2685318674743769e-05, |
|
"loss": 1.104, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2393156642875579e-05, |
|
"loss": 1.011, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2098811020648475e-05, |
|
"loss": 1.4294, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.180255037813906e-05, |
|
"loss": 1.1338, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1504645032747832e-05, |
|
"loss": 1.154, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1205366802553231e-05, |
|
"loss": 1.0417, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.090498875829638e-05, |
|
"loss": 0.89, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0603784974222862e-05, |
|
"loss": 0.8541, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.030203027800889e-05, |
|
"loss": 1.0875, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1117, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.697969721991114e-06, |
|
"loss": 1.0332, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.39621502577714e-06, |
|
"loss": 0.9024, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.095011241703623e-06, |
|
"loss": 0.8321, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.79463319744677e-06, |
|
"loss": 0.817, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.49535496725217e-06, |
|
"loss": 0.8457, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.197449621860944e-06, |
|
"loss": 0.9017, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.901188979351527e-06, |
|
"loss": 1.0759, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.606843357124426e-06, |
|
"loss": 1.011, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.314681325256232e-06, |
|
"loss": 0.8737, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.024969461447973e-06, |
|
"loss": 0.9498, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.7379721077913095e-06, |
|
"loss": 1.0125, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.453951129574644e-06, |
|
"loss": 0.7371, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.173165676349103e-06, |
|
"loss": 0.8445, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.895871945472434e-06, |
|
"loss": 1.0347, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.622322948346595e-06, |
|
"loss": 1.0074, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.352768279562315e-06, |
|
"loss": 0.887, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.087453889161229e-06, |
|
"loss": 0.9886, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.826621858223431e-06, |
|
"loss": 0.9414, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.570510177985213e-06, |
|
"loss": 1.098, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.319352532688444e-06, |
|
"loss": 1.0022, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.073378086359834e-06, |
|
"loss": 0.9925, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.832811273714569e-06, |
|
"loss": 0.7212, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.5978715953751207e-06, |
|
"loss": 0.7285, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.3687734175920505e-06, |
|
"loss": 0.8824, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.145725776649602e-06, |
|
"loss": 0.8606, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.7898, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.7185904612421177e-06, |
|
"loss": 0.913, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.514892518288988e-06, |
|
"loss": 0.9278, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.318024219597196e-06, |
|
"loss": 0.9006, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.1281651939094996e-06, |
|
"loss": 0.9705, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.94548867449054e-06, |
|
"loss": 0.848, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.7701613410634367e-06, |
|
"loss": 0.947, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.6023431677260215e-06, |
|
"loss": 0.8865, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.4421872769855262e-06, |
|
"loss": 0.9392, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.2898398000448441e-06, |
|
"loss": 0.85, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.1454397434679022e-06, |
|
"loss": 0.7694, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.0091188623457415e-06, |
|
"loss": 0.8638, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.810015400790994e-07, |
|
"loss": 0.8655, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.612046748871327e-07, |
|
"loss": 0.8398, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.498375731458529e-07, |
|
"loss": 0.7379, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.470018496535967e-07, |
|
"loss": 0.8092, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.5279133491454406e-07, |
|
"loss": 0.8303, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.6729198952483725e-07, |
|
"loss": 0.7979, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.905818257394799e-07, |
|
"loss": 0.834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.2273083629153148e-07, |
|
"loss": 0.8447, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.6380093052856482e-07, |
|
"loss": 0.8207, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.1384587792465873e-07, |
|
"loss": 0.8542, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.291125901946027e-08, |
|
"loss": 1.091, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.103442382909051e-08, |
|
"loss": 0.9051, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.824445776682504e-08, |
|
"loss": 0.9512, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.562155104665955e-09, |
|
"loss": 0.6725, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0, |
|
"loss": 0.7599, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"step": 108, |
|
"total_flos": 2.3415025142071296e+16, |
|
"train_loss": 1.4361559649308522, |
|
"train_runtime": 6959.0247, |
|
"train_samples_per_second": 2.012, |
|
"train_steps_per_second": 0.016 |
|
} |
|
], |
|
"max_steps": 108, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.3415025142071296e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|