|
{ |
|
"best_metric": 0.500215470790863, |
|
"best_model_checkpoint": "./MT-deer-7B/checkpoint-1400", |
|
"epoch": 2.662546059669559, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.4291, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.3933, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 1.3044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.1498, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00015, |
|
"loss": 0.9306, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.8534, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.8292, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.8054, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00027, |
|
"loss": 0.7794, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0003, |
|
"loss": 0.7457, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002979661016949152, |
|
"loss": 0.7358, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002959322033898305, |
|
"loss": 0.729, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002938983050847457, |
|
"loss": 0.7056, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000291864406779661, |
|
"loss": 0.6986, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00028983050847457623, |
|
"loss": 0.6859, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002877966101694915, |
|
"loss": 0.6639, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00028576271186440673, |
|
"loss": 0.6708, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000283728813559322, |
|
"loss": 0.6593, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00028169491525423723, |
|
"loss": 0.6497, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002796610169491525, |
|
"loss": 0.6432, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.642841637134552, |
|
"eval_runtime": 92.942, |
|
"eval_samples_per_second": 21.519, |
|
"eval_steps_per_second": 2.69, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00027762711864406773, |
|
"loss": 0.6384, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00027559322033898304, |
|
"loss": 0.6344, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002735593220338983, |
|
"loss": 0.6308, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00027152542372881354, |
|
"loss": 0.6239, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002694915254237288, |
|
"loss": 0.6229, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00026745762711864404, |
|
"loss": 0.6149, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002654237288135593, |
|
"loss": 0.6062, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00026338983050847454, |
|
"loss": 0.615, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002613559322033898, |
|
"loss": 0.6043, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002593220338983051, |
|
"loss": 0.6004, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00025728813559322035, |
|
"loss": 0.6058, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002552542372881356, |
|
"loss": 0.5902, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00025322033898305085, |
|
"loss": 0.5912, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002511864406779661, |
|
"loss": 0.5988, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00024915254237288135, |
|
"loss": 0.5806, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002471186440677966, |
|
"loss": 0.5879, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00024508474576271185, |
|
"loss": 0.5846, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002430508474576271, |
|
"loss": 0.5789, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00024101694915254235, |
|
"loss": 0.5787, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002389830508474576, |
|
"loss": 0.5802, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.5802366137504578, |
|
"eval_runtime": 93.7341, |
|
"eval_samples_per_second": 21.337, |
|
"eval_steps_per_second": 2.667, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00023694915254237286, |
|
"loss": 0.5854, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002349152542372881, |
|
"loss": 0.5757, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00023288135593220336, |
|
"loss": 0.572, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002308474576271186, |
|
"loss": 0.5626, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00022881355932203386, |
|
"loss": 0.5605, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002267796610169491, |
|
"loss": 0.5656, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0002247457627118644, |
|
"loss": 0.5592, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00022271186440677964, |
|
"loss": 0.5639, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0002206779661016949, |
|
"loss": 0.5616, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00021864406779661014, |
|
"loss": 0.5677, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00021661016949152542, |
|
"loss": 0.5562, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00021457627118644067, |
|
"loss": 0.5535, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00021254237288135592, |
|
"loss": 0.5604, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00021050847457627117, |
|
"loss": 0.5507, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00020847457627118642, |
|
"loss": 0.5434, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002064406779661017, |
|
"loss": 0.5517, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00020440677966101695, |
|
"loss": 0.5459, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0002023728813559322, |
|
"loss": 0.548, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00020033898305084745, |
|
"loss": 0.5463, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001983050847457627, |
|
"loss": 0.5402, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.5485313534736633, |
|
"eval_runtime": 93.9991, |
|
"eval_samples_per_second": 21.277, |
|
"eval_steps_per_second": 2.66, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019627118644067795, |
|
"loss": 0.541, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001942372881355932, |
|
"loss": 0.5471, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00019220338983050845, |
|
"loss": 0.536, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001901694915254237, |
|
"loss": 0.5386, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00018813559322033895, |
|
"loss": 0.5337, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001861016949152542, |
|
"loss": 0.5351, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00018406779661016948, |
|
"loss": 0.5225, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00018203389830508473, |
|
"loss": 0.5366, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.5323, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00017796610169491523, |
|
"loss": 0.5316, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00017593220338983049, |
|
"loss": 0.5385, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00017389830508474574, |
|
"loss": 0.5293, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.000171864406779661, |
|
"loss": 0.5233, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016983050847457624, |
|
"loss": 0.5135, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001677966101694915, |
|
"loss": 0.5266, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00016576271186440674, |
|
"loss": 0.5266, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.000163728813559322, |
|
"loss": 0.5217, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001616949152542373, |
|
"loss": 0.5149, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015966101694915255, |
|
"loss": 0.5239, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001576271186440678, |
|
"loss": 0.5202, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.5286936163902283, |
|
"eval_runtime": 94.1069, |
|
"eval_samples_per_second": 21.252, |
|
"eval_steps_per_second": 2.657, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00015559322033898305, |
|
"loss": 0.5321, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001535593220338983, |
|
"loss": 0.5203, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00015152542372881355, |
|
"loss": 0.5205, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001494915254237288, |
|
"loss": 0.5255, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00014745762711864405, |
|
"loss": 0.5164, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00014542372881355933, |
|
"loss": 0.5182, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00014338983050847458, |
|
"loss": 0.5171, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00014135593220338983, |
|
"loss": 0.5172, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013932203389830508, |
|
"loss": 0.5171, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00013728813559322033, |
|
"loss": 0.5114, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00013525423728813558, |
|
"loss": 0.5079, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00013322033898305083, |
|
"loss": 0.5158, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00013118644067796608, |
|
"loss": 0.5128, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012915254237288133, |
|
"loss": 0.5198, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012711864406779658, |
|
"loss": 0.5063, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00012508474576271184, |
|
"loss": 0.5068, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00012305084745762709, |
|
"loss": 0.5136, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00012101694915254236, |
|
"loss": 0.5012, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011898305084745763, |
|
"loss": 0.5071, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011694915254237288, |
|
"loss": 0.5092, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.514962375164032, |
|
"eval_runtime": 93.8613, |
|
"eval_samples_per_second": 21.308, |
|
"eval_steps_per_second": 2.664, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00011491525423728813, |
|
"loss": 0.5185, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00011288135593220338, |
|
"loss": 0.5055, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00011084745762711863, |
|
"loss": 0.5078, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010881355932203388, |
|
"loss": 0.5092, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010677966101694915, |
|
"loss": 0.4933, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0001047457627118644, |
|
"loss": 0.5052, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00010271186440677965, |
|
"loss": 0.4914, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001006779661016949, |
|
"loss": 0.4962, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.864406779661015e-05, |
|
"loss": 0.4969, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.661016949152541e-05, |
|
"loss": 0.5053, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.457627118644068e-05, |
|
"loss": 0.4934, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.254237288135593e-05, |
|
"loss": 0.4864, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.050847457627118e-05, |
|
"loss": 0.4974, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.847457627118644e-05, |
|
"loss": 0.4985, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.64406779661017e-05, |
|
"loss": 0.4945, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.440677966101695e-05, |
|
"loss": 0.4986, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.23728813559322e-05, |
|
"loss": 0.4887, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.033898305084745e-05, |
|
"loss": 0.5004, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.83050847457627e-05, |
|
"loss": 0.486, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.627118644067796e-05, |
|
"loss": 0.4863, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.5064148902893066, |
|
"eval_runtime": 93.4587, |
|
"eval_samples_per_second": 21.4, |
|
"eval_steps_per_second": 2.675, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.423728813559321e-05, |
|
"loss": 0.4959, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.220338983050848e-05, |
|
"loss": 0.4961, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.016949152542373e-05, |
|
"loss": 0.4902, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.813559322033898e-05, |
|
"loss": 0.4957, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 6.610169491525423e-05, |
|
"loss": 0.4998, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.406779661016948e-05, |
|
"loss": 0.4945, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 6.203389830508473e-05, |
|
"loss": 0.4914, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.4984, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 5.796610169491525e-05, |
|
"loss": 0.4926, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 5.59322033898305e-05, |
|
"loss": 0.4891, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.389830508474576e-05, |
|
"loss": 0.4895, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 5.186440677966101e-05, |
|
"loss": 0.4817, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.983050847457626e-05, |
|
"loss": 0.491, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.779661016949152e-05, |
|
"loss": 0.4878, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.576271186440678e-05, |
|
"loss": 0.4869, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.3728813559322035e-05, |
|
"loss": 0.488, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.1694915254237285e-05, |
|
"loss": 0.4866, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.9661016949152536e-05, |
|
"loss": 0.4892, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.7627118644067794e-05, |
|
"loss": 0.486, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.559322033898305e-05, |
|
"loss": 0.4879, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 0.500215470790863, |
|
"eval_runtime": 93.9201, |
|
"eval_samples_per_second": 21.295, |
|
"eval_steps_per_second": 2.662, |
|
"step": 1400 |
|
} |
|
], |
|
"max_steps": 1575, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.8195722467776922e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|