lomov's picture
Upload folder using huggingface_hub
3d346c9 verified
{
"best_metric": 0.4904100298881531,
"best_model_checkpoint": "strategydisofmaterialimpactsv1/checkpoint-410",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 410,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04878048780487805,
"grad_norm": 5.26182746887207,
"learning_rate": 9.75609756097561e-07,
"loss": 2.0959,
"step": 4
},
{
"epoch": 0.0975609756097561,
"grad_norm": 3.954611301422119,
"learning_rate": 1.951219512195122e-06,
"loss": 2.084,
"step": 8
},
{
"epoch": 0.14634146341463414,
"grad_norm": 2.762833595275879,
"learning_rate": 2.926829268292683e-06,
"loss": 2.1114,
"step": 12
},
{
"epoch": 0.1951219512195122,
"grad_norm": 3.979111909866333,
"learning_rate": 3.902439024390244e-06,
"loss": 2.0699,
"step": 16
},
{
"epoch": 0.24390243902439024,
"grad_norm": 2.9182474613189697,
"learning_rate": 4.8780487804878055e-06,
"loss": 2.0512,
"step": 20
},
{
"epoch": 0.2926829268292683,
"grad_norm": 4.365296840667725,
"learning_rate": 5.853658536585366e-06,
"loss": 2.049,
"step": 24
},
{
"epoch": 0.34146341463414637,
"grad_norm": 3.4509384632110596,
"learning_rate": 6.829268292682928e-06,
"loss": 2.0762,
"step": 28
},
{
"epoch": 0.3902439024390244,
"grad_norm": 3.717407703399658,
"learning_rate": 7.804878048780489e-06,
"loss": 2.1205,
"step": 32
},
{
"epoch": 0.43902439024390244,
"grad_norm": 3.0547749996185303,
"learning_rate": 8.78048780487805e-06,
"loss": 2.0718,
"step": 36
},
{
"epoch": 0.4878048780487805,
"grad_norm": 5.152350902557373,
"learning_rate": 9.756097560975611e-06,
"loss": 2.0393,
"step": 40
},
{
"epoch": 0.5365853658536586,
"grad_norm": 3.143453359603882,
"learning_rate": 9.91869918699187e-06,
"loss": 2.075,
"step": 44
},
{
"epoch": 0.5853658536585366,
"grad_norm": 3.0932018756866455,
"learning_rate": 9.81029810298103e-06,
"loss": 2.0555,
"step": 48
},
{
"epoch": 0.6341463414634146,
"grad_norm": 3.708493232727051,
"learning_rate": 9.70189701897019e-06,
"loss": 2.0697,
"step": 52
},
{
"epoch": 0.6829268292682927,
"grad_norm": 5.804870128631592,
"learning_rate": 9.59349593495935e-06,
"loss": 2.0829,
"step": 56
},
{
"epoch": 0.7317073170731707,
"grad_norm": 4.4362616539001465,
"learning_rate": 9.485094850948512e-06,
"loss": 2.0122,
"step": 60
},
{
"epoch": 0.7804878048780488,
"grad_norm": 3.124617099761963,
"learning_rate": 9.37669376693767e-06,
"loss": 2.0341,
"step": 64
},
{
"epoch": 0.8292682926829268,
"grad_norm": 6.305838108062744,
"learning_rate": 9.268292682926831e-06,
"loss": 2.006,
"step": 68
},
{
"epoch": 0.8780487804878049,
"grad_norm": 4.479095458984375,
"learning_rate": 9.15989159891599e-06,
"loss": 1.9879,
"step": 72
},
{
"epoch": 0.926829268292683,
"grad_norm": 4.230350494384766,
"learning_rate": 9.051490514905151e-06,
"loss": 2.0266,
"step": 76
},
{
"epoch": 0.975609756097561,
"grad_norm": 4.699102401733398,
"learning_rate": 8.94308943089431e-06,
"loss": 1.9846,
"step": 80
},
{
"epoch": 1.0,
"eval_accuracy": 0.4329268292682927,
"eval_f1_macro": 0.3427293862494566,
"eval_f1_micro": 0.4329268292682927,
"eval_f1_weighted": 0.34234078365494675,
"eval_loss": 1.8970143795013428,
"eval_precision_macro": 0.5188128944226504,
"eval_precision_micro": 0.4329268292682927,
"eval_precision_weighted": 0.51827396280103,
"eval_recall_macro": 0.43154761904761907,
"eval_recall_micro": 0.4329268292682927,
"eval_recall_weighted": 0.4329268292682927,
"eval_runtime": 0.9572,
"eval_samples_per_second": 171.331,
"eval_steps_per_second": 11.492,
"step": 82
},
{
"epoch": 1.024390243902439,
"grad_norm": 5.648143768310547,
"learning_rate": 8.834688346883469e-06,
"loss": 1.8587,
"step": 84
},
{
"epoch": 1.0731707317073171,
"grad_norm": 5.579315185546875,
"learning_rate": 8.726287262872629e-06,
"loss": 1.8257,
"step": 88
},
{
"epoch": 1.1219512195121952,
"grad_norm": 6.962705135345459,
"learning_rate": 8.617886178861789e-06,
"loss": 1.8158,
"step": 92
},
{
"epoch": 1.170731707317073,
"grad_norm": 7.015571117401123,
"learning_rate": 8.509485094850949e-06,
"loss": 1.6853,
"step": 96
},
{
"epoch": 1.2195121951219512,
"grad_norm": 6.109464168548584,
"learning_rate": 8.401084010840109e-06,
"loss": 1.8065,
"step": 100
},
{
"epoch": 1.2682926829268293,
"grad_norm": 7.725495338439941,
"learning_rate": 8.292682926829268e-06,
"loss": 1.7683,
"step": 104
},
{
"epoch": 1.3170731707317074,
"grad_norm": 8.51897144317627,
"learning_rate": 8.184281842818428e-06,
"loss": 1.5712,
"step": 108
},
{
"epoch": 1.3658536585365852,
"grad_norm": 7.310000419616699,
"learning_rate": 8.075880758807588e-06,
"loss": 1.5885,
"step": 112
},
{
"epoch": 1.4146341463414633,
"grad_norm": 6.727824687957764,
"learning_rate": 7.967479674796748e-06,
"loss": 1.4469,
"step": 116
},
{
"epoch": 1.4634146341463414,
"grad_norm": 5.8941450119018555,
"learning_rate": 7.859078590785908e-06,
"loss": 1.441,
"step": 120
},
{
"epoch": 1.5121951219512195,
"grad_norm": 8.939291954040527,
"learning_rate": 7.750677506775068e-06,
"loss": 1.3494,
"step": 124
},
{
"epoch": 1.5609756097560976,
"grad_norm": 6.784910678863525,
"learning_rate": 7.64227642276423e-06,
"loss": 1.4556,
"step": 128
},
{
"epoch": 1.6097560975609757,
"grad_norm": 5.674899578094482,
"learning_rate": 7.5338753387533885e-06,
"loss": 1.1797,
"step": 132
},
{
"epoch": 1.6585365853658538,
"grad_norm": 6.600795269012451,
"learning_rate": 7.425474254742548e-06,
"loss": 1.3146,
"step": 136
},
{
"epoch": 1.7073170731707317,
"grad_norm": 7.859283447265625,
"learning_rate": 7.317073170731707e-06,
"loss": 1.2627,
"step": 140
},
{
"epoch": 1.7560975609756098,
"grad_norm": 7.052417278289795,
"learning_rate": 7.208672086720868e-06,
"loss": 1.144,
"step": 144
},
{
"epoch": 1.8048780487804879,
"grad_norm": 5.840285301208496,
"learning_rate": 7.100271002710027e-06,
"loss": 1.1237,
"step": 148
},
{
"epoch": 1.8536585365853657,
"grad_norm": 7.636930465698242,
"learning_rate": 6.991869918699188e-06,
"loss": 1.0481,
"step": 152
},
{
"epoch": 1.9024390243902438,
"grad_norm": 7.1155877113342285,
"learning_rate": 6.883468834688347e-06,
"loss": 1.2339,
"step": 156
},
{
"epoch": 1.951219512195122,
"grad_norm": 6.952579975128174,
"learning_rate": 6.775067750677508e-06,
"loss": 0.9287,
"step": 160
},
{
"epoch": 2.0,
"grad_norm": 10.576449394226074,
"learning_rate": 6.666666666666667e-06,
"loss": 1.1756,
"step": 164
},
{
"epoch": 2.0,
"eval_accuracy": 0.8048780487804879,
"eval_f1_macro": 0.7928380685977737,
"eval_f1_micro": 0.8048780487804879,
"eval_f1_weighted": 0.7953137507177211,
"eval_loss": 0.9494345784187317,
"eval_precision_macro": 0.8125730994152047,
"eval_precision_micro": 0.8048780487804879,
"eval_precision_weighted": 0.8141135358722009,
"eval_recall_macro": 0.8014880952380952,
"eval_recall_micro": 0.8048780487804879,
"eval_recall_weighted": 0.8048780487804879,
"eval_runtime": 0.942,
"eval_samples_per_second": 174.105,
"eval_steps_per_second": 11.678,
"step": 164
},
{
"epoch": 2.048780487804878,
"grad_norm": 7.403735637664795,
"learning_rate": 6.558265582655827e-06,
"loss": 1.0034,
"step": 168
},
{
"epoch": 2.097560975609756,
"grad_norm": 6.475048542022705,
"learning_rate": 6.449864498644986e-06,
"loss": 0.8662,
"step": 172
},
{
"epoch": 2.1463414634146343,
"grad_norm": 8.434925079345703,
"learning_rate": 6.368563685636857e-06,
"loss": 1.1553,
"step": 176
},
{
"epoch": 2.1951219512195124,
"grad_norm": 5.570111274719238,
"learning_rate": 6.260162601626017e-06,
"loss": 0.8746,
"step": 180
},
{
"epoch": 2.2439024390243905,
"grad_norm": 8.765731811523438,
"learning_rate": 6.1517615176151765e-06,
"loss": 1.0089,
"step": 184
},
{
"epoch": 2.292682926829268,
"grad_norm": 4.403049468994141,
"learning_rate": 6.043360433604336e-06,
"loss": 0.8467,
"step": 188
},
{
"epoch": 2.341463414634146,
"grad_norm": 9.121601104736328,
"learning_rate": 5.934959349593496e-06,
"loss": 0.9069,
"step": 192
},
{
"epoch": 2.3902439024390243,
"grad_norm": 6.2799811363220215,
"learning_rate": 5.826558265582656e-06,
"loss": 0.7733,
"step": 196
},
{
"epoch": 2.4390243902439024,
"grad_norm": 6.013058185577393,
"learning_rate": 5.718157181571816e-06,
"loss": 0.8735,
"step": 200
},
{
"epoch": 2.4878048780487805,
"grad_norm": 6.162946701049805,
"learning_rate": 5.609756097560977e-06,
"loss": 0.6666,
"step": 204
},
{
"epoch": 2.5365853658536586,
"grad_norm": 4.041454792022705,
"learning_rate": 5.501355013550136e-06,
"loss": 0.6262,
"step": 208
},
{
"epoch": 2.5853658536585367,
"grad_norm": 6.468296051025391,
"learning_rate": 5.3929539295392965e-06,
"loss": 0.8259,
"step": 212
},
{
"epoch": 2.6341463414634148,
"grad_norm": 9.301799774169922,
"learning_rate": 5.2845528455284555e-06,
"loss": 0.8074,
"step": 216
},
{
"epoch": 2.682926829268293,
"grad_norm": 9.052480697631836,
"learning_rate": 5.176151761517616e-06,
"loss": 0.7582,
"step": 220
},
{
"epoch": 2.7317073170731705,
"grad_norm": 10.193408012390137,
"learning_rate": 5.067750677506775e-06,
"loss": 0.7229,
"step": 224
},
{
"epoch": 2.7804878048780486,
"grad_norm": 9.648282051086426,
"learning_rate": 4.959349593495935e-06,
"loss": 0.714,
"step": 228
},
{
"epoch": 2.8292682926829267,
"grad_norm": 5.158674240112305,
"learning_rate": 4.850948509485095e-06,
"loss": 0.701,
"step": 232
},
{
"epoch": 2.8780487804878048,
"grad_norm": 6.773287296295166,
"learning_rate": 4.742547425474256e-06,
"loss": 0.579,
"step": 236
},
{
"epoch": 2.926829268292683,
"grad_norm": 4.932857513427734,
"learning_rate": 4.634146341463416e-06,
"loss": 0.6223,
"step": 240
},
{
"epoch": 2.975609756097561,
"grad_norm": 6.144374370574951,
"learning_rate": 4.5257452574525755e-06,
"loss": 0.7543,
"step": 244
},
{
"epoch": 3.0,
"eval_accuracy": 0.8536585365853658,
"eval_f1_macro": 0.8338492833656121,
"eval_f1_micro": 0.8536585365853658,
"eval_f1_weighted": 0.8362103254837173,
"eval_loss": 0.6258153915405273,
"eval_precision_macro": 0.8495264546035806,
"eval_precision_micro": 0.8536585365853658,
"eval_precision_weighted": 0.8505702817977668,
"eval_recall_macro": 0.850297619047619,
"eval_recall_micro": 0.8536585365853658,
"eval_recall_weighted": 0.8536585365853658,
"eval_runtime": 0.964,
"eval_samples_per_second": 170.127,
"eval_steps_per_second": 11.411,
"step": 246
},
{
"epoch": 3.024390243902439,
"grad_norm": 7.524605751037598,
"learning_rate": 4.4173441734417345e-06,
"loss": 0.7291,
"step": 248
},
{
"epoch": 3.073170731707317,
"grad_norm": 6.1862945556640625,
"learning_rate": 4.308943089430894e-06,
"loss": 0.664,
"step": 252
},
{
"epoch": 3.1219512195121952,
"grad_norm": 4.109325885772705,
"learning_rate": 4.200542005420054e-06,
"loss": 0.5575,
"step": 256
},
{
"epoch": 3.1707317073170733,
"grad_norm": 5.083375453948975,
"learning_rate": 4.092140921409214e-06,
"loss": 0.5287,
"step": 260
},
{
"epoch": 3.2195121951219514,
"grad_norm": 4.418028354644775,
"learning_rate": 3.983739837398374e-06,
"loss": 0.4596,
"step": 264
},
{
"epoch": 3.2682926829268295,
"grad_norm": 8.103421211242676,
"learning_rate": 3.875338753387534e-06,
"loss": 0.5384,
"step": 268
},
{
"epoch": 3.317073170731707,
"grad_norm": 9.259288787841797,
"learning_rate": 3.7669376693766942e-06,
"loss": 0.6603,
"step": 272
},
{
"epoch": 3.3658536585365852,
"grad_norm": 8.9814453125,
"learning_rate": 3.6585365853658537e-06,
"loss": 0.614,
"step": 276
},
{
"epoch": 3.4146341463414633,
"grad_norm": 7.275993824005127,
"learning_rate": 3.5501355013550136e-06,
"loss": 0.6409,
"step": 280
},
{
"epoch": 3.4634146341463414,
"grad_norm": 7.694216251373291,
"learning_rate": 3.4417344173441734e-06,
"loss": 0.5708,
"step": 284
},
{
"epoch": 3.5121951219512195,
"grad_norm": 4.590734481811523,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.4449,
"step": 288
},
{
"epoch": 3.5609756097560976,
"grad_norm": 9.011459350585938,
"learning_rate": 3.224932249322493e-06,
"loss": 0.764,
"step": 292
},
{
"epoch": 3.6097560975609757,
"grad_norm": 6.224943161010742,
"learning_rate": 3.116531165311653e-06,
"loss": 0.5983,
"step": 296
},
{
"epoch": 3.658536585365854,
"grad_norm": 7.525081157684326,
"learning_rate": 3.0081300813008134e-06,
"loss": 0.6081,
"step": 300
},
{
"epoch": 3.7073170731707314,
"grad_norm": 7.639159202575684,
"learning_rate": 2.8997289972899733e-06,
"loss": 0.5492,
"step": 304
},
{
"epoch": 3.7560975609756095,
"grad_norm": 5.162847518920898,
"learning_rate": 2.791327913279133e-06,
"loss": 0.489,
"step": 308
},
{
"epoch": 3.8048780487804876,
"grad_norm": 6.004736423492432,
"learning_rate": 2.682926829268293e-06,
"loss": 0.4218,
"step": 312
},
{
"epoch": 3.8536585365853657,
"grad_norm": 5.897964000701904,
"learning_rate": 2.574525745257453e-06,
"loss": 0.5438,
"step": 316
},
{
"epoch": 3.902439024390244,
"grad_norm": 7.25752592086792,
"learning_rate": 2.4661246612466128e-06,
"loss": 0.5559,
"step": 320
},
{
"epoch": 3.951219512195122,
"grad_norm": 6.005576133728027,
"learning_rate": 2.3577235772357727e-06,
"loss": 0.6216,
"step": 324
},
{
"epoch": 4.0,
"grad_norm": 13.720139503479004,
"learning_rate": 2.2493224932249325e-06,
"loss": 0.6314,
"step": 328
},
{
"epoch": 4.0,
"eval_accuracy": 0.8658536585365854,
"eval_f1_macro": 0.851601435352396,
"eval_f1_micro": 0.8658536585365854,
"eval_f1_weighted": 0.8538194199208925,
"eval_loss": 0.5200314521789551,
"eval_precision_macro": 0.8594329005283454,
"eval_precision_micro": 0.8658536585365854,
"eval_precision_weighted": 0.8606490578892111,
"eval_recall_macro": 0.862797619047619,
"eval_recall_micro": 0.8658536585365854,
"eval_recall_weighted": 0.8658536585365854,
"eval_runtime": 0.943,
"eval_samples_per_second": 173.915,
"eval_steps_per_second": 11.665,
"step": 328
},
{
"epoch": 4.048780487804878,
"grad_norm": 6.544665336608887,
"learning_rate": 2.1409214092140924e-06,
"loss": 0.5041,
"step": 332
},
{
"epoch": 4.097560975609756,
"grad_norm": 7.109484672546387,
"learning_rate": 2.0325203252032523e-06,
"loss": 0.4561,
"step": 336
},
{
"epoch": 4.146341463414634,
"grad_norm": 4.8075127601623535,
"learning_rate": 1.924119241192412e-06,
"loss": 0.4234,
"step": 340
},
{
"epoch": 4.195121951219512,
"grad_norm": 8.269268035888672,
"learning_rate": 1.8157181571815718e-06,
"loss": 0.3753,
"step": 344
},
{
"epoch": 4.2439024390243905,
"grad_norm": 11.827970504760742,
"learning_rate": 1.707317073170732e-06,
"loss": 0.6272,
"step": 348
},
{
"epoch": 4.2926829268292686,
"grad_norm": 6.7040228843688965,
"learning_rate": 1.5989159891598918e-06,
"loss": 0.4453,
"step": 352
},
{
"epoch": 4.341463414634147,
"grad_norm": 4.821525573730469,
"learning_rate": 1.4905149051490517e-06,
"loss": 0.4374,
"step": 356
},
{
"epoch": 4.390243902439025,
"grad_norm": 5.777258396148682,
"learning_rate": 1.3821138211382116e-06,
"loss": 0.4603,
"step": 360
},
{
"epoch": 4.439024390243903,
"grad_norm": 4.959315299987793,
"learning_rate": 1.2737127371273714e-06,
"loss": 0.49,
"step": 364
},
{
"epoch": 4.487804878048781,
"grad_norm": 5.7751898765563965,
"learning_rate": 1.1653116531165313e-06,
"loss": 0.5202,
"step": 368
},
{
"epoch": 4.536585365853659,
"grad_norm": 8.084724426269531,
"learning_rate": 1.0569105691056912e-06,
"loss": 0.5615,
"step": 372
},
{
"epoch": 4.585365853658536,
"grad_norm": 4.794303894042969,
"learning_rate": 9.485094850948511e-07,
"loss": 0.5217,
"step": 376
},
{
"epoch": 4.634146341463414,
"grad_norm": 6.632653713226318,
"learning_rate": 8.401084010840109e-07,
"loss": 0.4886,
"step": 380
},
{
"epoch": 4.682926829268292,
"grad_norm": 3.7842323780059814,
"learning_rate": 7.317073170731707e-07,
"loss": 0.3898,
"step": 384
},
{
"epoch": 4.7317073170731705,
"grad_norm": 7.341838836669922,
"learning_rate": 6.233062330623307e-07,
"loss": 0.4803,
"step": 388
},
{
"epoch": 4.780487804878049,
"grad_norm": 5.619211196899414,
"learning_rate": 5.149051490514906e-07,
"loss": 0.4797,
"step": 392
},
{
"epoch": 4.829268292682927,
"grad_norm": 6.9567742347717285,
"learning_rate": 4.0650406504065046e-07,
"loss": 0.4595,
"step": 396
},
{
"epoch": 4.878048780487805,
"grad_norm": 4.7969136238098145,
"learning_rate": 2.9810298102981034e-07,
"loss": 0.3441,
"step": 400
},
{
"epoch": 4.926829268292683,
"grad_norm": 6.794863700866699,
"learning_rate": 1.897018970189702e-07,
"loss": 0.6099,
"step": 404
},
{
"epoch": 4.975609756097561,
"grad_norm": 3.76300311088562,
"learning_rate": 8.130081300813009e-08,
"loss": 0.4751,
"step": 408
},
{
"epoch": 5.0,
"eval_accuracy": 0.8658536585365854,
"eval_f1_macro": 0.851601435352396,
"eval_f1_micro": 0.8658536585365854,
"eval_f1_weighted": 0.8538194199208925,
"eval_loss": 0.4904100298881531,
"eval_precision_macro": 0.8594329005283454,
"eval_precision_micro": 0.8658536585365854,
"eval_precision_weighted": 0.8606490578892111,
"eval_recall_macro": 0.862797619047619,
"eval_recall_micro": 0.8658536585365854,
"eval_recall_weighted": 0.8658536585365854,
"eval_runtime": 0.9624,
"eval_samples_per_second": 170.414,
"eval_steps_per_second": 11.43,
"step": 410
}
],
"logging_steps": 4,
"max_steps": 410,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 431889927536640.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}