|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 59688, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019832462136442838, |
|
"loss": 38.657, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019664924272885673, |
|
"loss": 31.308, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 31.739517211914062, |
|
"eval_runtime": 0.2575, |
|
"eval_samples_per_second": 194.153, |
|
"eval_steps_per_second": 7.766, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019497386409328508, |
|
"loss": 29.927, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019329848545771345, |
|
"loss": 29.1683, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 30.288894653320312, |
|
"eval_runtime": 0.2122, |
|
"eval_samples_per_second": 235.624, |
|
"eval_steps_per_second": 9.425, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019162310682214182, |
|
"loss": 28.6403, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001899477281865702, |
|
"loss": 28.258, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 29.558307647705078, |
|
"eval_runtime": 0.2125, |
|
"eval_samples_per_second": 235.261, |
|
"eval_steps_per_second": 9.41, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018827234955099854, |
|
"loss": 27.9285, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001865969709154269, |
|
"loss": 27.6974, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 29.129697799682617, |
|
"eval_runtime": 0.213, |
|
"eval_samples_per_second": 234.762, |
|
"eval_steps_per_second": 9.39, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018492159227985526, |
|
"loss": 27.492, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018324621364428363, |
|
"loss": 27.3712, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 28.81637954711914, |
|
"eval_runtime": 0.2107, |
|
"eval_samples_per_second": 237.354, |
|
"eval_steps_per_second": 9.494, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00018157083500871198, |
|
"loss": 27.204, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00017989545637314033, |
|
"loss": 27.0678, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 28.593141555786133, |
|
"eval_runtime": 0.2097, |
|
"eval_samples_per_second": 238.454, |
|
"eval_steps_per_second": 9.538, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001782200777375687, |
|
"loss": 26.9572, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00017654469910199707, |
|
"loss": 26.8814, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 28.419679641723633, |
|
"eval_runtime": 0.2117, |
|
"eval_samples_per_second": 236.237, |
|
"eval_steps_per_second": 9.449, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00017486932046642542, |
|
"loss": 26.784, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001731939418308538, |
|
"loss": 26.6886, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 28.238792419433594, |
|
"eval_runtime": 0.2269, |
|
"eval_samples_per_second": 220.371, |
|
"eval_steps_per_second": 8.815, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017151856319528214, |
|
"loss": 26.6235, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001698431845597105, |
|
"loss": 26.545, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 28.171510696411133, |
|
"eval_runtime": 0.2133, |
|
"eval_samples_per_second": 234.396, |
|
"eval_steps_per_second": 9.376, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016816780592413886, |
|
"loss": 26.4933, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00016649242728856723, |
|
"loss": 26.4678, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 28.05875015258789, |
|
"eval_runtime": 0.2106, |
|
"eval_samples_per_second": 237.401, |
|
"eval_steps_per_second": 9.496, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00016481704865299558, |
|
"loss": 26.3908, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00016314167001742395, |
|
"loss": 26.3369, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 27.978954315185547, |
|
"eval_runtime": 0.2203, |
|
"eval_samples_per_second": 226.942, |
|
"eval_steps_per_second": 9.078, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001614662913818523, |
|
"loss": 26.3059, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00015979091274628067, |
|
"loss": 26.2555, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 27.862077713012695, |
|
"eval_runtime": 0.2131, |
|
"eval_samples_per_second": 234.646, |
|
"eval_steps_per_second": 9.386, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00015811553411070902, |
|
"loss": 26.197, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001564401554751374, |
|
"loss": 26.1535, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 27.81878662109375, |
|
"eval_runtime": 0.2128, |
|
"eval_samples_per_second": 234.945, |
|
"eval_steps_per_second": 9.398, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00015476477683956574, |
|
"loss": 26.1223, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001530893982039941, |
|
"loss": 26.0879, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 27.750713348388672, |
|
"eval_runtime": 0.2338, |
|
"eval_samples_per_second": 213.838, |
|
"eval_steps_per_second": 8.554, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00015141401956842248, |
|
"loss": 26.0771, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00014973864093285083, |
|
"loss": 26.0283, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 27.70425796508789, |
|
"eval_runtime": 0.2114, |
|
"eval_samples_per_second": 236.495, |
|
"eval_steps_per_second": 9.46, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00014806326229727917, |
|
"loss": 26.0089, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00014638788366170755, |
|
"loss": 25.9742, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 27.637338638305664, |
|
"eval_runtime": 0.2146, |
|
"eval_samples_per_second": 233.03, |
|
"eval_steps_per_second": 9.321, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00014471250502613592, |
|
"loss": 25.9742, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00014303712639056427, |
|
"loss": 25.9445, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 27.6143798828125, |
|
"eval_runtime": 0.2101, |
|
"eval_samples_per_second": 237.929, |
|
"eval_steps_per_second": 9.517, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00014136174775499264, |
|
"loss": 25.9146, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.000139686369119421, |
|
"loss": 25.8983, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 27.574024200439453, |
|
"eval_runtime": 0.2119, |
|
"eval_samples_per_second": 236.006, |
|
"eval_steps_per_second": 9.44, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00013801099048384936, |
|
"loss": 25.8699, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00013633561184827773, |
|
"loss": 25.8736, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 27.524921417236328, |
|
"eval_runtime": 0.2136, |
|
"eval_samples_per_second": 234.075, |
|
"eval_steps_per_second": 9.363, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00013466023321270608, |
|
"loss": 25.8176, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00013298485457713443, |
|
"loss": 25.8017, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 27.505229949951172, |
|
"eval_runtime": 0.2283, |
|
"eval_samples_per_second": 218.987, |
|
"eval_steps_per_second": 8.759, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001313094759415628, |
|
"loss": 25.7503, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00012963409730599117, |
|
"loss": 25.7469, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 27.50472640991211, |
|
"eval_runtime": 0.2116, |
|
"eval_samples_per_second": 236.284, |
|
"eval_steps_per_second": 9.451, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00012795871867041952, |
|
"loss": 25.7176, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00012628334003484786, |
|
"loss": 25.6876, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 27.477006912231445, |
|
"eval_runtime": 0.215, |
|
"eval_samples_per_second": 232.548, |
|
"eval_steps_per_second": 9.302, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00012460796139927624, |
|
"loss": 25.7203, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001229325827637046, |
|
"loss": 25.6757, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 27.425168991088867, |
|
"eval_runtime": 0.2125, |
|
"eval_samples_per_second": 235.312, |
|
"eval_steps_per_second": 9.412, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00012125720412813297, |
|
"loss": 25.6867, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00011958182549256132, |
|
"loss": 25.6558, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 27.394458770751953, |
|
"eval_runtime": 0.2106, |
|
"eval_samples_per_second": 237.425, |
|
"eval_steps_per_second": 9.497, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00011790644685698968, |
|
"loss": 25.6432, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00011623106822141805, |
|
"loss": 25.6223, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 27.391796112060547, |
|
"eval_runtime": 0.2101, |
|
"eval_samples_per_second": 237.937, |
|
"eval_steps_per_second": 9.517, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00011455568958584641, |
|
"loss": 25.6129, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00011288031095027476, |
|
"loss": 25.6234, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 27.368698120117188, |
|
"eval_runtime": 0.2109, |
|
"eval_samples_per_second": 237.043, |
|
"eval_steps_per_second": 9.482, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00011120493231470313, |
|
"loss": 25.5979, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00010952955367913149, |
|
"loss": 25.623, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 27.350811004638672, |
|
"eval_runtime": 0.2251, |
|
"eval_samples_per_second": 222.106, |
|
"eval_steps_per_second": 8.884, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00010785417504355986, |
|
"loss": 25.5917, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00010617879640798821, |
|
"loss": 25.5728, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 27.310701370239258, |
|
"eval_runtime": 0.2112, |
|
"eval_samples_per_second": 236.707, |
|
"eval_steps_per_second": 9.468, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00010450341777241657, |
|
"loss": 25.5796, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00010282803913684493, |
|
"loss": 25.5648, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 27.295310974121094, |
|
"eval_runtime": 0.217, |
|
"eval_samples_per_second": 230.466, |
|
"eval_steps_per_second": 9.219, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001011526605012733, |
|
"loss": 25.5587, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.947728186570166e-05, |
|
"loss": 25.5318, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 27.264293670654297, |
|
"eval_runtime": 0.2116, |
|
"eval_samples_per_second": 236.263, |
|
"eval_steps_per_second": 9.451, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.780190323013e-05, |
|
"loss": 25.5344, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.612652459455838e-05, |
|
"loss": 25.5212, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 27.232440948486328, |
|
"eval_runtime": 0.2137, |
|
"eval_samples_per_second": 233.953, |
|
"eval_steps_per_second": 9.358, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.445114595898673e-05, |
|
"loss": 25.5354, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.27757673234151e-05, |
|
"loss": 25.5099, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 27.240272521972656, |
|
"eval_runtime": 0.2111, |
|
"eval_samples_per_second": 236.859, |
|
"eval_steps_per_second": 9.474, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.110038868784345e-05, |
|
"loss": 25.5093, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.942501005227182e-05, |
|
"loss": 25.497, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 27.228918075561523, |
|
"eval_runtime": 0.2125, |
|
"eval_samples_per_second": 235.337, |
|
"eval_steps_per_second": 9.413, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.774963141670018e-05, |
|
"loss": 25.5092, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.607425278112854e-05, |
|
"loss": 25.4787, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 27.228187561035156, |
|
"eval_runtime": 0.2095, |
|
"eval_samples_per_second": 238.654, |
|
"eval_steps_per_second": 9.546, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.439887414555691e-05, |
|
"loss": 25.4684, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.272349550998526e-05, |
|
"loss": 25.483, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 27.22612762451172, |
|
"eval_runtime": 0.2116, |
|
"eval_samples_per_second": 236.31, |
|
"eval_steps_per_second": 9.452, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.104811687441363e-05, |
|
"loss": 25.4463, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.937273823884198e-05, |
|
"loss": 25.4702, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 27.186805725097656, |
|
"eval_runtime": 0.2143, |
|
"eval_samples_per_second": 233.331, |
|
"eval_steps_per_second": 9.333, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.769735960327035e-05, |
|
"loss": 25.4289, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.60219809676987e-05, |
|
"loss": 25.4306, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 27.194353103637695, |
|
"eval_runtime": 0.2096, |
|
"eval_samples_per_second": 238.518, |
|
"eval_steps_per_second": 9.541, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.434660233212707e-05, |
|
"loss": 25.4421, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.267122369655543e-05, |
|
"loss": 25.4518, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 27.154298782348633, |
|
"eval_runtime": 0.22, |
|
"eval_samples_per_second": 227.255, |
|
"eval_steps_per_second": 9.09, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.099584506098379e-05, |
|
"loss": 25.4411, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.932046642541215e-05, |
|
"loss": 25.4264, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 27.156774520874023, |
|
"eval_runtime": 0.2232, |
|
"eval_samples_per_second": 224.006, |
|
"eval_steps_per_second": 8.96, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.764508778984051e-05, |
|
"loss": 25.4384, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.596970915426887e-05, |
|
"loss": 25.4168, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 27.140100479125977, |
|
"eval_runtime": 0.2133, |
|
"eval_samples_per_second": 234.392, |
|
"eval_steps_per_second": 9.376, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.429433051869723e-05, |
|
"loss": 25.4029, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.261895188312559e-05, |
|
"loss": 25.386, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 27.135889053344727, |
|
"eval_runtime": 0.2136, |
|
"eval_samples_per_second": 234.042, |
|
"eval_steps_per_second": 9.362, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.0943573247553954e-05, |
|
"loss": 25.3667, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.926819461198231e-05, |
|
"loss": 25.3798, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 27.12567138671875, |
|
"eval_runtime": 0.2109, |
|
"eval_samples_per_second": 237.075, |
|
"eval_steps_per_second": 9.483, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.7592815976410674e-05, |
|
"loss": 25.3643, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.5917437340839026e-05, |
|
"loss": 25.3643, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 27.113182067871094, |
|
"eval_runtime": 0.2121, |
|
"eval_samples_per_second": 235.709, |
|
"eval_steps_per_second": 9.428, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.424205870526739e-05, |
|
"loss": 25.3653, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.256668006969575e-05, |
|
"loss": 25.3378, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 27.1131649017334, |
|
"eval_runtime": 0.2131, |
|
"eval_samples_per_second": 234.625, |
|
"eval_steps_per_second": 9.385, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.089130143412412e-05, |
|
"loss": 25.3598, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.921592279855248e-05, |
|
"loss": 25.3431, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 27.10150909423828, |
|
"eval_runtime": 0.3343, |
|
"eval_samples_per_second": 149.553, |
|
"eval_steps_per_second": 5.982, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.754054416298084e-05, |
|
"loss": 25.3568, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.58651655274092e-05, |
|
"loss": 25.3384, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 27.091062545776367, |
|
"eval_runtime": 0.2132, |
|
"eval_samples_per_second": 234.536, |
|
"eval_steps_per_second": 9.381, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.418978689183756e-05, |
|
"loss": 25.3337, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.251440825626592e-05, |
|
"loss": 25.3516, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 27.0788631439209, |
|
"eval_runtime": 0.2156, |
|
"eval_samples_per_second": 231.865, |
|
"eval_steps_per_second": 9.275, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.083902962069428e-05, |
|
"loss": 25.3368, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.9163650985122644e-05, |
|
"loss": 25.3104, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 27.085359573364258, |
|
"eval_runtime": 0.2126, |
|
"eval_samples_per_second": 235.191, |
|
"eval_steps_per_second": 9.408, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.7488272349551004e-05, |
|
"loss": 25.3357, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.581289371397936e-05, |
|
"loss": 25.3254, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 27.069896697998047, |
|
"eval_runtime": 0.2134, |
|
"eval_samples_per_second": 234.3, |
|
"eval_steps_per_second": 9.372, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.413751507840772e-05, |
|
"loss": 25.3286, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.246213644283608e-05, |
|
"loss": 25.3307, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 27.073589324951172, |
|
"eval_runtime": 0.2151, |
|
"eval_samples_per_second": 232.429, |
|
"eval_steps_per_second": 9.297, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.078675780726444e-05, |
|
"loss": 25.3197, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9111379171692806e-05, |
|
"loss": 25.3153, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 27.077919006347656, |
|
"eval_runtime": 0.267, |
|
"eval_samples_per_second": 187.287, |
|
"eval_steps_per_second": 7.491, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7436000536121165e-05, |
|
"loss": 25.3012, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.5760621900549525e-05, |
|
"loss": 25.3149, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 27.066099166870117, |
|
"eval_runtime": 0.2122, |
|
"eval_samples_per_second": 235.617, |
|
"eval_steps_per_second": 9.425, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.4085243264977885e-05, |
|
"loss": 25.2943, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2409864629406248e-05, |
|
"loss": 25.3251, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 27.05815887451172, |
|
"eval_runtime": 0.2117, |
|
"eval_samples_per_second": 236.215, |
|
"eval_steps_per_second": 9.449, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0734485993834608e-05, |
|
"loss": 25.2907, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.9059107358262967e-05, |
|
"loss": 25.3185, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 27.06610107421875, |
|
"eval_runtime": 0.2143, |
|
"eval_samples_per_second": 233.359, |
|
"eval_steps_per_second": 9.334, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7383728722691327e-05, |
|
"loss": 25.3016, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.570835008711969e-05, |
|
"loss": 25.3045, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 27.05499267578125, |
|
"eval_runtime": 0.2149, |
|
"eval_samples_per_second": 232.706, |
|
"eval_steps_per_second": 9.308, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.403297145154805e-05, |
|
"loss": 25.2932, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2357592815976411e-05, |
|
"loss": 25.2952, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 27.059736251831055, |
|
"eval_runtime": 0.2105, |
|
"eval_samples_per_second": 237.516, |
|
"eval_steps_per_second": 9.501, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.0682214180404771e-05, |
|
"loss": 25.2941, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 9.006835544833132e-06, |
|
"loss": 25.2863, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 27.0577392578125, |
|
"eval_runtime": 0.231, |
|
"eval_samples_per_second": 216.423, |
|
"eval_steps_per_second": 8.657, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.331456909261493e-06, |
|
"loss": 25.2943, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.656078273689854e-06, |
|
"loss": 25.2966, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 27.05270004272461, |
|
"eval_runtime": 0.2335, |
|
"eval_samples_per_second": 214.093, |
|
"eval_steps_per_second": 8.564, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.980699638118215e-06, |
|
"loss": 25.2804, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.3053210025465755e-06, |
|
"loss": 25.2995, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_loss": 27.049020767211914, |
|
"eval_runtime": 0.2618, |
|
"eval_samples_per_second": 190.997, |
|
"eval_steps_per_second": 7.64, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 6.299423669749365e-07, |
|
"loss": 25.2862, |
|
"step": 59500 |
|
} |
|
], |
|
"max_steps": 59688, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.715405206616277e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|