|
{ |
|
"best_metric": 0.9918692001767565, |
|
"best_model_checkpoint": "electricidad-small-discriminator-finetuned-clasificacion-comentarios-suicidas/checkpoint-84154", |
|
"epoch": 15.0, |
|
"global_step": 90165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9889092219819223e-05, |
|
"loss": 0.3744, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.977818443963844e-05, |
|
"loss": 0.2448, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9667276659457665e-05, |
|
"loss": 0.2103, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9556368879276883e-05, |
|
"loss": 0.2037, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9445461099096104e-05, |
|
"loss": 0.1902, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.9334553318915322e-05, |
|
"loss": 0.1874, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.9223645538734543e-05, |
|
"loss": 0.1717, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.9112737758553765e-05, |
|
"loss": 0.1723, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.9001829978372986e-05, |
|
"loss": 0.1697, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8890922198192204e-05, |
|
"loss": 0.1637, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8780014418011425e-05, |
|
"loss": 0.1593, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.8669106637830646e-05, |
|
"loss": 0.1611, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9527176314626602, |
|
"eval_loss": 0.13305681943893433, |
|
"eval_runtime": 183.6617, |
|
"eval_samples_per_second": 123.216, |
|
"eval_steps_per_second": 3.855, |
|
"step": 6011 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.8558198857649864e-05, |
|
"loss": 0.1436, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.8447291077469085e-05, |
|
"loss": 0.1391, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.8336383297288306e-05, |
|
"loss": 0.1382, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.8225475517107528e-05, |
|
"loss": 0.1332, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.8114567736926746e-05, |
|
"loss": 0.1385, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.8003659956745967e-05, |
|
"loss": 0.136, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.7892752176565188e-05, |
|
"loss": 0.1426, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.7781844396384406e-05, |
|
"loss": 0.1308, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.767093661620363e-05, |
|
"loss": 0.1355, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.7560028836022848e-05, |
|
"loss": 0.1331, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.744912105584207e-05, |
|
"loss": 0.1272, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7338213275661287e-05, |
|
"loss": 0.1345, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9608042421564296, |
|
"eval_loss": 0.11096587032079697, |
|
"eval_runtime": 183.8189, |
|
"eval_samples_per_second": 123.11, |
|
"eval_steps_per_second": 3.852, |
|
"step": 12022 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.722730549548051e-05, |
|
"loss": 0.1176, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.711639771529973e-05, |
|
"loss": 0.1138, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.700548993511895e-05, |
|
"loss": 0.1121, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.689458215493817e-05, |
|
"loss": 0.1163, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.678367437475739e-05, |
|
"loss": 0.1115, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.667276659457661e-05, |
|
"loss": 0.1201, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.656185881439583e-05, |
|
"loss": 0.1215, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.6450951034215054e-05, |
|
"loss": 0.112, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.634004325403427e-05, |
|
"loss": 0.122, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6229135473853493e-05, |
|
"loss": 0.1084, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.611822769367271e-05, |
|
"loss": 0.1208, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.6007319913491932e-05, |
|
"loss": 0.1085, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9708351745470615, |
|
"eval_loss": 0.08641747385263443, |
|
"eval_runtime": 183.9354, |
|
"eval_samples_per_second": 123.032, |
|
"eval_steps_per_second": 3.849, |
|
"step": 18033 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.5896412133311153e-05, |
|
"loss": 0.1011, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.5785504353130374e-05, |
|
"loss": 0.1, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.5674596572949596e-05, |
|
"loss": 0.102, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.5563688792768813e-05, |
|
"loss": 0.0978, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.5452781012588035e-05, |
|
"loss": 0.1005, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.5341873232407252e-05, |
|
"loss": 0.1019, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.5230965452226474e-05, |
|
"loss": 0.0985, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.5120057672045697e-05, |
|
"loss": 0.0971, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.5009149891864916e-05, |
|
"loss": 0.107, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.4898242111684136e-05, |
|
"loss": 0.1001, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.4787334331503355e-05, |
|
"loss": 0.0988, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.4676426551322576e-05, |
|
"loss": 0.0994, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.974856385329209, |
|
"eval_loss": 0.07361753284931183, |
|
"eval_runtime": 183.7502, |
|
"eval_samples_per_second": 123.156, |
|
"eval_steps_per_second": 3.853, |
|
"step": 24044 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.4565518771141798e-05, |
|
"loss": 0.0895, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.4454610990961017e-05, |
|
"loss": 0.0833, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.4343703210780238e-05, |
|
"loss": 0.0866, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.4232795430599458e-05, |
|
"loss": 0.0892, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.4121887650418678e-05, |
|
"loss": 0.0837, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.4010979870237897e-05, |
|
"loss": 0.0903, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.390007209005712e-05, |
|
"loss": 0.0825, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.378916430987634e-05, |
|
"loss": 0.0905, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.3678256529695559e-05, |
|
"loss": 0.0822, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.3567348749514779e-05, |
|
"loss": 0.0904, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.3456440969334e-05, |
|
"loss": 0.0938, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.334553318915322e-05, |
|
"loss": 0.0905, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9796288113124172, |
|
"eval_loss": 0.06523095816373825, |
|
"eval_runtime": 184.074, |
|
"eval_samples_per_second": 122.94, |
|
"eval_steps_per_second": 3.846, |
|
"step": 30055 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.323462540897244e-05, |
|
"loss": 0.072, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 1.3123717628791662e-05, |
|
"loss": 0.0769, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.3012809848610881e-05, |
|
"loss": 0.0761, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.2901902068430101e-05, |
|
"loss": 0.0782, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.279099428824932e-05, |
|
"loss": 0.0682, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 1.2680086508068543e-05, |
|
"loss": 0.0799, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 1.2569178727887763e-05, |
|
"loss": 0.0766, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.2458270947706982e-05, |
|
"loss": 0.0762, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 1.2347363167526204e-05, |
|
"loss": 0.0775, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 1.2236455387345423e-05, |
|
"loss": 0.0756, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 1.2125547607164643e-05, |
|
"loss": 0.0784, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 1.2014639826983866e-05, |
|
"loss": 0.0807, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9823243482103402, |
|
"eval_loss": 0.0608493909239769, |
|
"eval_runtime": 183.9197, |
|
"eval_samples_per_second": 123.043, |
|
"eval_steps_per_second": 3.85, |
|
"step": 36066 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.1903732046803085e-05, |
|
"loss": 0.0664, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.1792824266622305e-05, |
|
"loss": 0.0656, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.1681916486441524e-05, |
|
"loss": 0.0669, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.1571008706260745e-05, |
|
"loss": 0.0646, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.1460100926079965e-05, |
|
"loss": 0.067, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.1349193145899186e-05, |
|
"loss": 0.0708, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.1238285365718407e-05, |
|
"loss": 0.0729, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.1127377585537627e-05, |
|
"loss": 0.0682, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.1016469805356846e-05, |
|
"loss": 0.0747, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.0905562025176066e-05, |
|
"loss": 0.0697, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.0794654244995286e-05, |
|
"loss": 0.0695, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.0683746464814508e-05, |
|
"loss": 0.0692, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9861246133451171, |
|
"eval_loss": 0.05471781641244888, |
|
"eval_runtime": 184.0533, |
|
"eval_samples_per_second": 122.954, |
|
"eval_steps_per_second": 3.847, |
|
"step": 42077 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.0572838684633728e-05, |
|
"loss": 0.062, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.0461930904452948e-05, |
|
"loss": 0.0573, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.0351023124272169e-05, |
|
"loss": 0.0599, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 1.0240115344091388e-05, |
|
"loss": 0.0577, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.012920756391061e-05, |
|
"loss": 0.0582, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.001829978372983e-05, |
|
"loss": 0.0637, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 9.90739200354905e-06, |
|
"loss": 0.0658, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 9.79648422336827e-06, |
|
"loss": 0.06, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 9.68557644318749e-06, |
|
"loss": 0.0575, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 9.57466866300671e-06, |
|
"loss": 0.0635, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 9.46376088282593e-06, |
|
"loss": 0.0623, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 9.352853102645151e-06, |
|
"loss": 0.0604, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9859478568272205, |
|
"eval_loss": 0.051153190433979034, |
|
"eval_runtime": 184.082, |
|
"eval_samples_per_second": 122.934, |
|
"eval_steps_per_second": 3.846, |
|
"step": 48088 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 9.241945322464373e-06, |
|
"loss": 0.0507, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.131037542283592e-06, |
|
"loss": 0.0538, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 9.020129762102813e-06, |
|
"loss": 0.0506, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.909221981922033e-06, |
|
"loss": 0.0526, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.798314201741252e-06, |
|
"loss": 0.0579, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 8.687406421560474e-06, |
|
"loss": 0.051, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 8.576498641379693e-06, |
|
"loss": 0.0508, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 8.465590861198913e-06, |
|
"loss": 0.0566, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 8.354683081018134e-06, |
|
"loss": 0.0582, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 8.243775300837355e-06, |
|
"loss": 0.0512, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 8.132867520656575e-06, |
|
"loss": 0.0544, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 8.021959740475796e-06, |
|
"loss": 0.0482, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9897481219619974, |
|
"eval_loss": 0.04574726149439812, |
|
"eval_runtime": 184.1495, |
|
"eval_samples_per_second": 122.889, |
|
"eval_steps_per_second": 3.845, |
|
"step": 54099 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 7.911051960295015e-06, |
|
"loss": 0.0506, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.800144180114235e-06, |
|
"loss": 0.0423, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 7.689236399933456e-06, |
|
"loss": 0.0458, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.5783286197526765e-06, |
|
"loss": 0.0454, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 7.467420839571896e-06, |
|
"loss": 0.0434, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 7.356513059391117e-06, |
|
"loss": 0.0494, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 7.245605279210337e-06, |
|
"loss": 0.0477, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 7.134697499029557e-06, |
|
"loss": 0.0473, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 7.0237897188487784e-06, |
|
"loss": 0.0488, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 6.912881938667998e-06, |
|
"loss": 0.0518, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 6.801974158487219e-06, |
|
"loss": 0.0482, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 6.691066378306439e-06, |
|
"loss": 0.0455, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9880689350419797, |
|
"eval_loss": 0.049992308020591736, |
|
"eval_runtime": 183.6678, |
|
"eval_samples_per_second": 123.212, |
|
"eval_steps_per_second": 3.855, |
|
"step": 60110 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 6.580158598125659e-06, |
|
"loss": 0.0419, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 6.4692508179448795e-06, |
|
"loss": 0.0401, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 6.3583430377641e-06, |
|
"loss": 0.0403, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 6.247435257583319e-06, |
|
"loss": 0.0375, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 6.136527477402541e-06, |
|
"loss": 0.0461, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 6.025619697221761e-06, |
|
"loss": 0.0443, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 5.9147119170409805e-06, |
|
"loss": 0.0439, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 5.803804136860202e-06, |
|
"loss": 0.0374, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 5.692896356679421e-06, |
|
"loss": 0.0382, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 5.581988576498642e-06, |
|
"loss": 0.0428, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.471080796317863e-06, |
|
"loss": 0.0401, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 5.3601730161370824e-06, |
|
"loss": 0.0434, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.990234202386213, |
|
"eval_loss": 0.046325162053108215, |
|
"eval_runtime": 183.7954, |
|
"eval_samples_per_second": 123.126, |
|
"eval_steps_per_second": 3.852, |
|
"step": 66121 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 5.249265235956302e-06, |
|
"loss": 0.044, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 5.138357455775523e-06, |
|
"loss": 0.0348, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 5.0274496755947436e-06, |
|
"loss": 0.0358, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.916541895413964e-06, |
|
"loss": 0.0356, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.805634115233184e-06, |
|
"loss": 0.0366, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.694726335052404e-06, |
|
"loss": 0.0361, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 4.583818554871624e-06, |
|
"loss": 0.038, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 4.4729107746908455e-06, |
|
"loss": 0.0383, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 4.362002994510065e-06, |
|
"loss": 0.0429, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 4.251095214329285e-06, |
|
"loss": 0.0341, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 4.140187434148506e-06, |
|
"loss": 0.0369, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 11.98, |
|
"learning_rate": 4.029279653967726e-06, |
|
"loss": 0.0343, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9897923110914715, |
|
"eval_loss": 0.050745535641908646, |
|
"eval_runtime": 183.7772, |
|
"eval_samples_per_second": 123.138, |
|
"eval_steps_per_second": 3.852, |
|
"step": 72132 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 3.9183718737869465e-06, |
|
"loss": 0.0326, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 12.14, |
|
"learning_rate": 3.807464093606167e-06, |
|
"loss": 0.0315, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3.6965563134253873e-06, |
|
"loss": 0.0338, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 3.5856485332446072e-06, |
|
"loss": 0.0326, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 3.4747407530638276e-06, |
|
"loss": 0.0365, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 3.3638329728830484e-06, |
|
"loss": 0.0312, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 3.252925192702268e-06, |
|
"loss": 0.0321, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 3.1420174125214888e-06, |
|
"loss": 0.0281, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 3.031109632340709e-06, |
|
"loss": 0.0399, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 2.920201852159929e-06, |
|
"loss": 0.0347, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 2.8092940719791495e-06, |
|
"loss": 0.0296, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 2.69838629179837e-06, |
|
"loss": 0.0329, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.991736632788334, |
|
"eval_loss": 0.04343385621905327, |
|
"eval_runtime": 183.857, |
|
"eval_samples_per_second": 123.085, |
|
"eval_steps_per_second": 3.851, |
|
"step": 78143 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 2.5874785116175907e-06, |
|
"loss": 0.0345, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 2.47657073143681e-06, |
|
"loss": 0.0337, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 2.365662951256031e-06, |
|
"loss": 0.0269, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 2.254755171075251e-06, |
|
"loss": 0.0318, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 2.1438473908944713e-06, |
|
"loss": 0.033, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 2.0329396107136917e-06, |
|
"loss": 0.0325, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 1.922031830532912e-06, |
|
"loss": 0.0241, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"learning_rate": 1.8111240503521325e-06, |
|
"loss": 0.0282, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 1.7002162701713526e-06, |
|
"loss": 0.027, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 1.589308489990573e-06, |
|
"loss": 0.0348, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 1.4784007098097932e-06, |
|
"loss": 0.0267, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 1.3674929296290136e-06, |
|
"loss": 0.0284, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9918692001767565, |
|
"eval_loss": 0.04500320181250572, |
|
"eval_runtime": 183.85, |
|
"eval_samples_per_second": 123.089, |
|
"eval_steps_per_second": 3.851, |
|
"step": 84154 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 1.256585149448234e-06, |
|
"loss": 0.0336, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 1.145677369267454e-06, |
|
"loss": 0.0285, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 14.22, |
|
"learning_rate": 1.0347695890866745e-06, |
|
"loss": 0.0265, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 9.238618089058949e-07, |
|
"loss": 0.0244, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 14.39, |
|
"learning_rate": 8.129540287251151e-07, |
|
"loss": 0.0288, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 7.020462485443355e-07, |
|
"loss": 0.0272, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 14.56, |
|
"learning_rate": 5.911384683635558e-07, |
|
"loss": 0.0293, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 4.802306881827761e-07, |
|
"loss": 0.0275, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 3.693229080019964e-07, |
|
"loss": 0.0246, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 2.5841512782121667e-07, |
|
"loss": 0.0301, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 1.47507347640437e-07, |
|
"loss": 0.0275, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 3.65995674596573e-08, |
|
"loss": 0.0223, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9916482545293858, |
|
"eval_loss": 0.04581888020038605, |
|
"eval_runtime": 183.9121, |
|
"eval_samples_per_second": 123.048, |
|
"eval_steps_per_second": 3.85, |
|
"step": 90165 |
|
} |
|
], |
|
"max_steps": 90165, |
|
"num_train_epochs": 15, |
|
"total_flos": 8.488169482669056e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|