m4lw4r3exe's picture
End of training
e07dc6f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 50835,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 4.899281990754402e-05,
"loss": 0.2509,
"step": 1024
},
{
"epoch": 0.1,
"eval_loss": 0.42676058411598206,
"eval_runtime": 32.3598,
"eval_samples_per_second": 106.212,
"eval_steps_per_second": 3.337,
"step": 1024
},
{
"epoch": 0.2,
"learning_rate": 4.798563981508803e-05,
"loss": 0.2521,
"step": 2048
},
{
"epoch": 0.2,
"eval_loss": 0.4283539652824402,
"eval_runtime": 32.4193,
"eval_samples_per_second": 106.017,
"eval_steps_per_second": 3.331,
"step": 2048
},
{
"epoch": 0.3,
"learning_rate": 4.697845972263204e-05,
"loss": 0.2533,
"step": 3072
},
{
"epoch": 0.3,
"eval_loss": 0.42187923192977905,
"eval_runtime": 32.3798,
"eval_samples_per_second": 106.146,
"eval_steps_per_second": 3.335,
"step": 3072
},
{
"epoch": 0.4,
"learning_rate": 4.597127963017606e-05,
"loss": 0.2517,
"step": 4096
},
{
"epoch": 0.4,
"eval_loss": 0.42451202869415283,
"eval_runtime": 32.3385,
"eval_samples_per_second": 106.282,
"eval_steps_per_second": 3.34,
"step": 4096
},
{
"epoch": 0.5,
"learning_rate": 4.496409953772008e-05,
"loss": 0.2512,
"step": 5120
},
{
"epoch": 0.5,
"eval_loss": 0.42285656929016113,
"eval_runtime": 32.3235,
"eval_samples_per_second": 106.331,
"eval_steps_per_second": 3.341,
"step": 5120
},
{
"epoch": 0.6,
"learning_rate": 4.3956919445264097e-05,
"loss": 0.2506,
"step": 6144
},
{
"epoch": 0.6,
"eval_loss": 0.4190705716609955,
"eval_runtime": 32.4437,
"eval_samples_per_second": 105.937,
"eval_steps_per_second": 3.329,
"step": 6144
},
{
"epoch": 0.71,
"learning_rate": 4.294973935280811e-05,
"loss": 0.2512,
"step": 7168
},
{
"epoch": 0.71,
"eval_loss": 0.4247213900089264,
"eval_runtime": 32.3289,
"eval_samples_per_second": 106.314,
"eval_steps_per_second": 3.341,
"step": 7168
},
{
"epoch": 0.81,
"learning_rate": 4.194255926035212e-05,
"loss": 0.2483,
"step": 8192
},
{
"epoch": 0.81,
"eval_loss": 0.4238651394844055,
"eval_runtime": 32.3862,
"eval_samples_per_second": 106.126,
"eval_steps_per_second": 3.335,
"step": 8192
},
{
"epoch": 0.91,
"learning_rate": 4.093537916789614e-05,
"loss": 0.2479,
"step": 9216
},
{
"epoch": 0.91,
"eval_loss": 0.4259129762649536,
"eval_runtime": 32.4627,
"eval_samples_per_second": 105.875,
"eval_steps_per_second": 3.327,
"step": 9216
},
{
"epoch": 1.01,
"learning_rate": 3.9928199075440155e-05,
"loss": 0.2498,
"step": 10240
},
{
"epoch": 1.01,
"eval_loss": 0.4262418746948242,
"eval_runtime": 32.3999,
"eval_samples_per_second": 106.081,
"eval_steps_per_second": 3.333,
"step": 10240
},
{
"epoch": 1.11,
"learning_rate": 3.8921018982984166e-05,
"loss": 0.2467,
"step": 11264
},
{
"epoch": 1.11,
"eval_loss": 0.4267333149909973,
"eval_runtime": 32.3308,
"eval_samples_per_second": 106.307,
"eval_steps_per_second": 3.34,
"step": 11264
},
{
"epoch": 1.21,
"learning_rate": 3.791482246483722e-05,
"loss": 0.2466,
"step": 12288
},
{
"epoch": 1.21,
"eval_loss": 0.4263165295124054,
"eval_runtime": 32.4348,
"eval_samples_per_second": 105.967,
"eval_steps_per_second": 3.33,
"step": 12288
},
{
"epoch": 1.31,
"learning_rate": 3.690764237238124e-05,
"loss": 0.2449,
"step": 13312
},
{
"epoch": 1.31,
"eval_loss": 0.42505738139152527,
"eval_runtime": 32.3762,
"eval_samples_per_second": 106.158,
"eval_steps_per_second": 3.336,
"step": 13312
},
{
"epoch": 1.41,
"learning_rate": 3.590144585423429e-05,
"loss": 0.2452,
"step": 14336
},
{
"epoch": 1.41,
"eval_loss": 0.42740598320961,
"eval_runtime": 32.44,
"eval_samples_per_second": 105.949,
"eval_steps_per_second": 3.329,
"step": 14336
},
{
"epoch": 1.51,
"learning_rate": 3.489426576177831e-05,
"loss": 0.2449,
"step": 15360
},
{
"epoch": 1.51,
"eval_loss": 0.42628249526023865,
"eval_runtime": 32.3172,
"eval_samples_per_second": 106.352,
"eval_steps_per_second": 3.342,
"step": 15360
},
{
"epoch": 1.61,
"learning_rate": 3.388708566932232e-05,
"loss": 0.2444,
"step": 16384
},
{
"epoch": 1.61,
"eval_loss": 0.42398524284362793,
"eval_runtime": 32.2909,
"eval_samples_per_second": 106.439,
"eval_steps_per_second": 3.345,
"step": 16384
},
{
"epoch": 1.71,
"learning_rate": 3.287990557686633e-05,
"loss": 0.2428,
"step": 17408
},
{
"epoch": 1.71,
"eval_loss": 0.42891454696655273,
"eval_runtime": 32.3773,
"eval_samples_per_second": 106.155,
"eval_steps_per_second": 3.336,
"step": 17408
},
{
"epoch": 1.81,
"learning_rate": 3.1873709058719384e-05,
"loss": 0.2425,
"step": 18432
},
{
"epoch": 1.81,
"eval_loss": 0.4228712022304535,
"eval_runtime": 32.4341,
"eval_samples_per_second": 105.969,
"eval_steps_per_second": 3.33,
"step": 18432
},
{
"epoch": 1.91,
"learning_rate": 3.08665289662634e-05,
"loss": 0.2424,
"step": 19456
},
{
"epoch": 1.91,
"eval_loss": 0.4291061758995056,
"eval_runtime": 32.3192,
"eval_samples_per_second": 106.345,
"eval_steps_per_second": 3.342,
"step": 19456
},
{
"epoch": 2.01,
"learning_rate": 2.985934887380742e-05,
"loss": 0.2422,
"step": 20480
},
{
"epoch": 2.01,
"eval_loss": 0.4246675968170166,
"eval_runtime": 32.2862,
"eval_samples_per_second": 106.454,
"eval_steps_per_second": 3.345,
"step": 20480
},
{
"epoch": 2.12,
"learning_rate": 2.8853152355660473e-05,
"loss": 0.2397,
"step": 21504
},
{
"epoch": 2.12,
"eval_loss": 0.42707231640815735,
"eval_runtime": 32.3373,
"eval_samples_per_second": 106.286,
"eval_steps_per_second": 3.34,
"step": 21504
},
{
"epoch": 2.22,
"learning_rate": 2.7846955837513527e-05,
"loss": 0.2397,
"step": 22528
},
{
"epoch": 2.22,
"eval_loss": 0.42262786626815796,
"eval_runtime": 32.3328,
"eval_samples_per_second": 106.301,
"eval_steps_per_second": 3.34,
"step": 22528
},
{
"epoch": 2.32,
"learning_rate": 2.6839775745057538e-05,
"loss": 0.2411,
"step": 23552
},
{
"epoch": 2.32,
"eval_loss": 0.42685696482658386,
"eval_runtime": 32.3962,
"eval_samples_per_second": 106.093,
"eval_steps_per_second": 3.334,
"step": 23552
},
{
"epoch": 2.42,
"learning_rate": 2.5832595652601556e-05,
"loss": 0.2408,
"step": 24576
},
{
"epoch": 2.42,
"eval_loss": 0.42877742648124695,
"eval_runtime": 32.3163,
"eval_samples_per_second": 106.355,
"eval_steps_per_second": 3.342,
"step": 24576
},
{
"epoch": 2.52,
"learning_rate": 2.482541556014557e-05,
"loss": 0.2392,
"step": 25600
},
{
"epoch": 2.52,
"eval_loss": 0.42227810621261597,
"eval_runtime": 32.369,
"eval_samples_per_second": 106.182,
"eval_steps_per_second": 3.337,
"step": 25600
},
{
"epoch": 2.62,
"learning_rate": 2.3819219041998624e-05,
"loss": 0.2391,
"step": 26624
},
{
"epoch": 2.62,
"eval_loss": 0.4296777546405792,
"eval_runtime": 32.4315,
"eval_samples_per_second": 105.977,
"eval_steps_per_second": 3.33,
"step": 26624
},
{
"epoch": 2.72,
"learning_rate": 2.2812038949542638e-05,
"loss": 0.2385,
"step": 27648
},
{
"epoch": 2.72,
"eval_loss": 0.4252742528915405,
"eval_runtime": 32.4362,
"eval_samples_per_second": 105.962,
"eval_steps_per_second": 3.33,
"step": 27648
},
{
"epoch": 2.82,
"learning_rate": 2.180584243139569e-05,
"loss": 0.2371,
"step": 28672
},
{
"epoch": 2.82,
"eval_loss": 0.42966845631599426,
"eval_runtime": 32.3834,
"eval_samples_per_second": 106.135,
"eval_steps_per_second": 3.335,
"step": 28672
},
{
"epoch": 2.92,
"learning_rate": 2.079866233893971e-05,
"loss": 0.2373,
"step": 29696
},
{
"epoch": 2.92,
"eval_loss": 0.4231690466403961,
"eval_runtime": 32.3708,
"eval_samples_per_second": 106.176,
"eval_steps_per_second": 3.336,
"step": 29696
},
{
"epoch": 3.02,
"learning_rate": 1.97934493951018e-05,
"loss": 0.2368,
"step": 30720
},
{
"epoch": 3.02,
"eval_loss": 0.42956846952438354,
"eval_runtime": 32.3442,
"eval_samples_per_second": 106.263,
"eval_steps_per_second": 3.339,
"step": 30720
},
{
"epoch": 3.12,
"learning_rate": 1.8786269302645816e-05,
"loss": 0.2355,
"step": 31744
},
{
"epoch": 3.12,
"eval_loss": 0.43274641036987305,
"eval_runtime": 32.3365,
"eval_samples_per_second": 106.289,
"eval_steps_per_second": 3.34,
"step": 31744
},
{
"epoch": 3.22,
"learning_rate": 1.777908921018983e-05,
"loss": 0.2354,
"step": 32768
},
{
"epoch": 3.22,
"eval_loss": 0.4304845929145813,
"eval_runtime": 32.2799,
"eval_samples_per_second": 106.475,
"eval_steps_per_second": 3.346,
"step": 32768
},
{
"epoch": 3.32,
"learning_rate": 1.6771909117733845e-05,
"loss": 0.2345,
"step": 33792
},
{
"epoch": 3.32,
"eval_loss": 0.4286292791366577,
"eval_runtime": 32.4389,
"eval_samples_per_second": 105.953,
"eval_steps_per_second": 3.329,
"step": 33792
},
{
"epoch": 3.42,
"learning_rate": 1.5765712599586898e-05,
"loss": 0.2355,
"step": 34816
},
{
"epoch": 3.42,
"eval_loss": 0.4350430965423584,
"eval_runtime": 32.3371,
"eval_samples_per_second": 106.287,
"eval_steps_per_second": 3.34,
"step": 34816
},
{
"epoch": 3.53,
"learning_rate": 1.4758532507130915e-05,
"loss": 0.2353,
"step": 35840
},
{
"epoch": 3.53,
"eval_loss": 0.4268806278705597,
"eval_runtime": 32.3956,
"eval_samples_per_second": 106.095,
"eval_steps_per_second": 3.334,
"step": 35840
},
{
"epoch": 3.63,
"learning_rate": 1.375233598898397e-05,
"loss": 0.2351,
"step": 36864
},
{
"epoch": 3.63,
"eval_loss": 0.43005427718162537,
"eval_runtime": 32.3262,
"eval_samples_per_second": 106.323,
"eval_steps_per_second": 3.341,
"step": 36864
},
{
"epoch": 3.73,
"learning_rate": 1.2745155896527982e-05,
"loss": 0.2336,
"step": 37888
},
{
"epoch": 3.73,
"eval_loss": 0.4301435649394989,
"eval_runtime": 32.4031,
"eval_samples_per_second": 106.07,
"eval_steps_per_second": 3.333,
"step": 37888
},
{
"epoch": 3.83,
"learning_rate": 1.1737975804071997e-05,
"loss": 0.2344,
"step": 38912
},
{
"epoch": 3.83,
"eval_loss": 0.43188127875328064,
"eval_runtime": 32.3893,
"eval_samples_per_second": 106.115,
"eval_steps_per_second": 3.334,
"step": 38912
},
{
"epoch": 3.93,
"learning_rate": 1.0730795711616013e-05,
"loss": 0.2339,
"step": 39936
},
{
"epoch": 3.93,
"eval_loss": 0.4304964244365692,
"eval_runtime": 32.4411,
"eval_samples_per_second": 105.946,
"eval_steps_per_second": 3.329,
"step": 39936
},
{
"epoch": 4.03,
"learning_rate": 9.724599193469066e-06,
"loss": 0.2326,
"step": 40960
},
{
"epoch": 4.03,
"eval_loss": 0.4298175573348999,
"eval_runtime": 32.3377,
"eval_samples_per_second": 106.285,
"eval_steps_per_second": 3.34,
"step": 40960
},
{
"epoch": 4.13,
"learning_rate": 8.718402675322121e-06,
"loss": 0.2316,
"step": 41984
},
{
"epoch": 4.13,
"eval_loss": 0.43077352643013,
"eval_runtime": 32.3835,
"eval_samples_per_second": 106.134,
"eval_steps_per_second": 3.335,
"step": 41984
},
{
"epoch": 4.23,
"learning_rate": 7.711222582866136e-06,
"loss": 0.2311,
"step": 43008
},
{
"epoch": 4.23,
"eval_loss": 0.43301910161972046,
"eval_runtime": 32.3178,
"eval_samples_per_second": 106.35,
"eval_steps_per_second": 3.342,
"step": 43008
},
{
"epoch": 4.33,
"learning_rate": 6.704042490410151e-06,
"loss": 0.2315,
"step": 44032
},
{
"epoch": 4.33,
"eval_loss": 0.4313049912452698,
"eval_runtime": 32.3292,
"eval_samples_per_second": 106.313,
"eval_steps_per_second": 3.341,
"step": 44032
},
{
"epoch": 4.43,
"learning_rate": 5.697845972263205e-06,
"loss": 0.2305,
"step": 45056
},
{
"epoch": 4.43,
"eval_loss": 0.43192604184150696,
"eval_runtime": 32.2814,
"eval_samples_per_second": 106.47,
"eval_steps_per_second": 3.346,
"step": 45056
},
{
"epoch": 4.53,
"learning_rate": 4.69066587980722e-06,
"loss": 0.2328,
"step": 46080
},
{
"epoch": 4.53,
"eval_loss": 0.42917123436927795,
"eval_runtime": 32.2788,
"eval_samples_per_second": 106.479,
"eval_steps_per_second": 3.346,
"step": 46080
},
{
"epoch": 4.63,
"learning_rate": 3.6834857873512347e-06,
"loss": 0.232,
"step": 47104
},
{
"epoch": 4.63,
"eval_loss": 0.4288509488105774,
"eval_runtime": 32.3263,
"eval_samples_per_second": 106.322,
"eval_steps_per_second": 3.341,
"step": 47104
},
{
"epoch": 4.73,
"learning_rate": 2.6763056948952493e-06,
"loss": 0.2309,
"step": 48128
},
{
"epoch": 4.73,
"eval_loss": 0.43027371168136597,
"eval_runtime": 32.3873,
"eval_samples_per_second": 106.122,
"eval_steps_per_second": 3.335,
"step": 48128
},
{
"epoch": 4.83,
"learning_rate": 1.6701091767483034e-06,
"loss": 0.23,
"step": 49152
},
{
"epoch": 4.83,
"eval_loss": 0.4316680133342743,
"eval_runtime": 32.3291,
"eval_samples_per_second": 106.313,
"eval_steps_per_second": 3.341,
"step": 49152
},
{
"epoch": 4.94,
"learning_rate": 6.629290842923184e-07,
"loss": 0.2315,
"step": 50176
},
{
"epoch": 4.94,
"eval_loss": 0.4303137958049774,
"eval_runtime": 32.4187,
"eval_samples_per_second": 106.019,
"eval_steps_per_second": 3.331,
"step": 50176
},
{
"epoch": 5.0,
"step": 50835,
"total_flos": 4.2534856293423514e+17,
"train_loss": 0.24000135361532715,
"train_runtime": 35584.6801,
"train_samples_per_second": 51.426,
"train_steps_per_second": 1.429
}
],
"max_steps": 50835,
"num_train_epochs": 5,
"total_flos": 4.2534856293423514e+17,
"trial_name": null,
"trial_params": null
}