best-performing-clustering-4 / trainer_state.json
MHGanainy's picture
MHGanainy/best-performing-clustering-4
a8cb07d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 7679,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013022528975126969,
"grad_norm": 0.13363803923130035,
"learning_rate": 6.666666666666667e-06,
"loss": 2.4964,
"step": 100
},
{
"epoch": 0.026045057950253938,
"grad_norm": 0.2602158486843109,
"learning_rate": 1.3333333333333333e-05,
"loss": 2.484,
"step": 200
},
{
"epoch": 0.03906758692538091,
"grad_norm": 0.3781226873397827,
"learning_rate": 2e-05,
"loss": 2.4035,
"step": 300
},
{
"epoch": 0.052090115900507876,
"grad_norm": 0.4238300323486328,
"learning_rate": 1.999093831153269e-05,
"loss": 2.3381,
"step": 400
},
{
"epoch": 0.06511264487563485,
"grad_norm": 0.4826453924179077,
"learning_rate": 1.9963769668970327e-05,
"loss": 2.3359,
"step": 500
},
{
"epoch": 0.07813517385076182,
"grad_norm": 0.5577328205108643,
"learning_rate": 1.991854331106791e-05,
"loss": 2.3252,
"step": 600
},
{
"epoch": 0.09115770282588878,
"grad_norm": 0.6093964576721191,
"learning_rate": 1.9855341203258605e-05,
"loss": 2.3003,
"step": 700
},
{
"epoch": 0.10418023180101575,
"grad_norm": 0.6448187828063965,
"learning_rate": 1.9774277889104696e-05,
"loss": 2.2692,
"step": 800
},
{
"epoch": 0.11720276077614272,
"grad_norm": 0.6290944218635559,
"learning_rate": 1.967550028270599e-05,
"loss": 2.2529,
"step": 900
},
{
"epoch": 0.1302252897512697,
"grad_norm": 0.6941649317741394,
"learning_rate": 1.9559187402441825e-05,
"loss": 2.2357,
"step": 1000
},
{
"epoch": 0.14324781872639666,
"grad_norm": 0.752715528011322,
"learning_rate": 1.942555004652934e-05,
"loss": 2.2442,
"step": 1100
},
{
"epoch": 0.15627034770152365,
"grad_norm": 0.6779051423072815,
"learning_rate": 1.9274830410985915e-05,
"loss": 2.2162,
"step": 1200
},
{
"epoch": 0.1692928766766506,
"grad_norm": 0.6820743083953857,
"learning_rate": 1.9107301650688188e-05,
"loss": 2.2573,
"step": 1300
},
{
"epoch": 0.18231540565177756,
"grad_norm": 1.0248582363128662,
"learning_rate": 1.8923267384323184e-05,
"loss": 2.2348,
"step": 1400
},
{
"epoch": 0.19533793462690455,
"grad_norm": 0.6467209458351135,
"learning_rate": 1.8723061144128728e-05,
"loss": 2.2301,
"step": 1500
},
{
"epoch": 0.2083604636020315,
"grad_norm": 0.918159008026123,
"learning_rate": 1.8507045771420383e-05,
"loss": 2.2039,
"step": 1600
},
{
"epoch": 0.2213829925771585,
"grad_norm": 0.7963811755180359,
"learning_rate": 1.8275612759000486e-05,
"loss": 2.2139,
"step": 1700
},
{
"epoch": 0.23440552155228545,
"grad_norm": 0.731287956237793,
"learning_rate": 1.8029181541640952e-05,
"loss": 2.1956,
"step": 1800
},
{
"epoch": 0.24742805052741243,
"grad_norm": 0.7656735777854919,
"learning_rate": 1.7768198735925848e-05,
"loss": 2.2251,
"step": 1900
},
{
"epoch": 0.2604505795025394,
"grad_norm": 0.9210675358772278,
"learning_rate": 1.7493137330831318e-05,
"loss": 2.1839,
"step": 2000
},
{
"epoch": 0.27347310847766637,
"grad_norm": 0.6853137016296387,
"learning_rate": 1.7204495830509832e-05,
"loss": 2.1842,
"step": 2100
},
{
"epoch": 0.28649563745279333,
"grad_norm": 0.6904870271682739,
"learning_rate": 1.6902797350832318e-05,
"loss": 2.1832,
"step": 2200
},
{
"epoch": 0.2995181664279203,
"grad_norm": 1.0184141397476196,
"learning_rate": 1.6588588671325554e-05,
"loss": 2.1457,
"step": 2300
},
{
"epoch": 0.3125406954030473,
"grad_norm": 0.8124328851699829,
"learning_rate": 1.626243924422303e-05,
"loss": 2.1382,
"step": 2400
},
{
"epoch": 0.32556322437817425,
"grad_norm": 0.9830845594406128,
"learning_rate": 1.592494016242518e-05,
"loss": 2.1469,
"step": 2500
},
{
"epoch": 0.3385857533533012,
"grad_norm": 0.9020227193832397,
"learning_rate": 1.5576703088239455e-05,
"loss": 2.1907,
"step": 2600
},
{
"epoch": 0.35160828232842817,
"grad_norm": 1.0550016164779663,
"learning_rate": 1.5218359144841666e-05,
"loss": 2.1908,
"step": 2700
},
{
"epoch": 0.3646308113035551,
"grad_norm": 0.7753348350524902,
"learning_rate": 1.4850557772467655e-05,
"loss": 2.1503,
"step": 2800
},
{
"epoch": 0.37765334027868214,
"grad_norm": 0.9372894167900085,
"learning_rate": 1.4473965551408284e-05,
"loss": 2.1639,
"step": 2900
},
{
"epoch": 0.3906758692538091,
"grad_norm": 0.7956731915473938,
"learning_rate": 1.4089264993940843e-05,
"loss": 2.1498,
"step": 3000
},
{
"epoch": 0.40369839822893605,
"grad_norm": 0.7356022000312805,
"learning_rate": 1.3697153307386327e-05,
"loss": 2.1705,
"step": 3100
},
{
"epoch": 0.416720927204063,
"grad_norm": 0.864648699760437,
"learning_rate": 1.3298341130534323e-05,
"loss": 2.1514,
"step": 3200
},
{
"epoch": 0.42974345617919,
"grad_norm": 1.0018072128295898,
"learning_rate": 1.2893551245725551e-05,
"loss": 2.1069,
"step": 3300
},
{
"epoch": 0.442765985154317,
"grad_norm": 0.9439816474914551,
"learning_rate": 1.2483517268926188e-05,
"loss": 2.1194,
"step": 3400
},
{
"epoch": 0.45578851412944393,
"grad_norm": 0.8040792346000671,
"learning_rate": 1.2068982320167986e-05,
"loss": 2.1365,
"step": 3500
},
{
"epoch": 0.4688110431045709,
"grad_norm": 1.0141490697860718,
"learning_rate": 1.1650697676763833e-05,
"loss": 2.1117,
"step": 3600
},
{
"epoch": 0.4818335720796979,
"grad_norm": 1.0436880588531494,
"learning_rate": 1.1229421411739574e-05,
"loss": 2.1248,
"step": 3700
},
{
"epoch": 0.49485610105482486,
"grad_norm": 1.00360906124115,
"learning_rate": 1.0805917019949665e-05,
"loss": 2.1349,
"step": 3800
},
{
"epoch": 0.5078786300299518,
"grad_norm": 0.9457325339317322,
"learning_rate": 1.0380952034366703e-05,
"loss": 2.1253,
"step": 3900
},
{
"epoch": 0.5209011590050788,
"grad_norm": 0.871446967124939,
"learning_rate": 9.955296635052454e-06,
"loss": 2.1322,
"step": 4000
},
{
"epoch": 0.5339236879802057,
"grad_norm": 1.199268102645874,
"learning_rate": 9.529722253331522e-06,
"loss": 2.1386,
"step": 4100
},
{
"epoch": 0.5469462169553327,
"grad_norm": 0.8790176510810852,
"learning_rate": 9.105000173697276e-06,
"loss": 2.1618,
"step": 4200
},
{
"epoch": 0.5599687459304596,
"grad_norm": 1.089074969291687,
"learning_rate": 8.681900135983885e-06,
"loss": 2.132,
"step": 4300
},
{
"epoch": 0.5729912749055867,
"grad_norm": 1.0114529132843018,
"learning_rate": 8.26118894033779e-06,
"loss": 2.1433,
"step": 4400
},
{
"epoch": 0.5860138038807137,
"grad_norm": 1.2954638004302979,
"learning_rate": 7.843629057516935e-06,
"loss": 2.1213,
"step": 4500
},
{
"epoch": 0.5990363328558406,
"grad_norm": 1.0429459810256958,
"learning_rate": 7.429977247036231e-06,
"loss": 2.0845,
"step": 4600
},
{
"epoch": 0.6120588618309676,
"grad_norm": 0.9565938115119934,
"learning_rate": 7.020983185663779e-06,
"loss": 2.1291,
"step": 4700
},
{
"epoch": 0.6250813908060946,
"grad_norm": 0.900711715221405,
"learning_rate": 6.617388108753403e-06,
"loss": 2.1065,
"step": 4800
},
{
"epoch": 0.6381039197812215,
"grad_norm": 1.0982881784439087,
"learning_rate": 6.219923466875894e-06,
"loss": 2.1607,
"step": 4900
},
{
"epoch": 0.6511264487563485,
"grad_norm": 1.1638540029525757,
"learning_rate": 5.829309600183536e-06,
"loss": 2.0958,
"step": 5000
},
{
"epoch": 0.6641489777314754,
"grad_norm": 0.9292285442352295,
"learning_rate": 5.446254432910526e-06,
"loss": 2.1075,
"step": 5100
},
{
"epoch": 0.6771715067066024,
"grad_norm": 0.8159144520759583,
"learning_rate": 5.071452190375194e-06,
"loss": 2.1218,
"step": 5200
},
{
"epoch": 0.6901940356817294,
"grad_norm": 1.1008979082107544,
"learning_rate": 4.705582140809275e-06,
"loss": 2.1127,
"step": 5300
},
{
"epoch": 0.7032165646568563,
"grad_norm": 0.885331928730011,
"learning_rate": 4.349307364294512e-06,
"loss": 2.1082,
"step": 5400
},
{
"epoch": 0.7162390936319833,
"grad_norm": 0.9930873513221741,
"learning_rate": 4.0032735510376055e-06,
"loss": 2.1218,
"step": 5500
},
{
"epoch": 0.7292616226071102,
"grad_norm": 0.9115188717842102,
"learning_rate": 3.668107831161537e-06,
"loss": 2.1659,
"step": 5600
},
{
"epoch": 0.7422841515822373,
"grad_norm": 1.0631247758865356,
"learning_rate": 3.344417638133999e-06,
"loss": 2.1263,
"step": 5700
},
{
"epoch": 0.7553066805573643,
"grad_norm": 1.2672806978225708,
"learning_rate": 3.032789607892811e-06,
"loss": 2.1333,
"step": 5800
},
{
"epoch": 0.7683292095324912,
"grad_norm": 1.0414445400238037,
"learning_rate": 2.733788515663528e-06,
"loss": 2.1135,
"step": 5900
},
{
"epoch": 0.7813517385076182,
"grad_norm": 1.0180047750473022,
"learning_rate": 2.447956252395974e-06,
"loss": 2.1743,
"step": 6000
},
{
"epoch": 0.7943742674827452,
"grad_norm": 0.8480224609375,
"learning_rate": 2.1758108426748847e-06,
"loss": 2.1312,
"step": 6100
},
{
"epoch": 0.8073967964578721,
"grad_norm": 1.0538519620895386,
"learning_rate": 1.9178455058843938e-06,
"loss": 2.0941,
"step": 6200
},
{
"epoch": 0.8204193254329991,
"grad_norm": 0.8907907605171204,
"learning_rate": 1.6745277623279766e-06,
"loss": 2.1227,
"step": 6300
},
{
"epoch": 0.833441854408126,
"grad_norm": 0.8479962348937988,
"learning_rate": 1.446298585923771e-06,
"loss": 2.1258,
"step": 6400
},
{
"epoch": 0.846464383383253,
"grad_norm": 1.204111933708191,
"learning_rate": 1.2335716050109182e-06,
"loss": 2.1328,
"step": 6500
},
{
"epoch": 0.85948691235838,
"grad_norm": 1.111570119857788,
"learning_rate": 1.0367323527153462e-06,
"loss": 2.1176,
"step": 6600
},
{
"epoch": 0.8725094413335069,
"grad_norm": 0.7793114185333252,
"learning_rate": 8.561375682335393e-07,
"loss": 2.1142,
"step": 6700
},
{
"epoch": 0.885531970308634,
"grad_norm": 0.8252727389335632,
"learning_rate": 6.92114550300661e-07,
"loss": 2.1273,
"step": 6800
},
{
"epoch": 0.8985544992837609,
"grad_norm": 0.9007234573364258,
"learning_rate": 5.449605640147038e-07,
"loss": 2.1098,
"step": 6900
},
{
"epoch": 0.9115770282588879,
"grad_norm": 0.9859702587127686,
"learning_rate": 4.149423020917587e-07,
"loss": 2.1,
"step": 7000
},
{
"epoch": 0.9245995572340149,
"grad_norm": 1.1626622676849365,
"learning_rate": 3.022954015287449e-07,
"loss": 2.1123,
"step": 7100
},
{
"epoch": 0.9376220862091418,
"grad_norm": 0.9756370186805725,
"learning_rate": 2.0722401654960644e-07,
"loss": 2.1186,
"step": 7200
},
{
"epoch": 0.9506446151842688,
"grad_norm": 1.004597783088684,
"learning_rate": 1.299004486089095e-07,
"loss": 2.0744,
"step": 7300
},
{
"epoch": 0.9636671441593958,
"grad_norm": 1.021781086921692,
"learning_rate": 7.046483412342708e-08,
"loss": 2.086,
"step": 7400
},
{
"epoch": 0.9766896731345227,
"grad_norm": 0.9634168148040771,
"learning_rate": 2.9024890497625356e-08,
"loss": 2.1365,
"step": 7500
},
{
"epoch": 0.9897122021096497,
"grad_norm": 1.1906155347824097,
"learning_rate": 5.655720903351425e-09,
"loss": 2.1136,
"step": 7600
},
{
"epoch": 1.0,
"step": 7679,
"total_flos": 1.3987105112064e+17,
"train_loss": 2.1709037589008475,
"train_runtime": 4004.8154,
"train_samples_per_second": 3.835,
"train_steps_per_second": 1.917
}
],
"logging_steps": 100,
"max_steps": 7679,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.3987105112064e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}