|
{ |
|
"best_metric": 0.022257013246417046, |
|
"best_model_checkpoint": "/kaggle/working/output/checkpoint-150", |
|
"epoch": 42.857142857142854, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"eval_LCC": -0.38242422405954946, |
|
"eval_SROCC": -0.3890036014405762, |
|
"eval_loss": 0.2948198914527893, |
|
"eval_runtime": 37.5549, |
|
"eval_samples_per_second": 1.331, |
|
"eval_steps_per_second": 0.053, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_LCC": -0.3732136136633078, |
|
"eval_SROCC": -0.36653061224489797, |
|
"eval_loss": 0.11433681845664978, |
|
"eval_runtime": 35.6391, |
|
"eval_samples_per_second": 1.403, |
|
"eval_steps_per_second": 0.056, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 4.934920310974121, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.1552, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"eval_LCC": -0.3657486170203663, |
|
"eval_SROCC": -0.3477070828331333, |
|
"eval_loss": 0.07677865773439407, |
|
"eval_runtime": 35.6897, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_LCC": -0.35044377450614, |
|
"eval_SROCC": -0.3395438175270108, |
|
"eval_loss": 0.07478620857000351, |
|
"eval_runtime": 35.8511, |
|
"eval_samples_per_second": 1.395, |
|
"eval_steps_per_second": 0.056, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"eval_LCC": -0.33219308626687694, |
|
"eval_SROCC": -0.34981992797118844, |
|
"eval_loss": 0.05174265429377556, |
|
"eval_runtime": 35.8384, |
|
"eval_samples_per_second": 1.395, |
|
"eval_steps_per_second": 0.056, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 2.3777573108673096, |
|
"learning_rate": 9.966191788709716e-06, |
|
"loss": 0.0657, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_LCC": -0.30599681663050493, |
|
"eval_SROCC": -0.33368547418967587, |
|
"eval_loss": 0.05527381971478462, |
|
"eval_runtime": 35.8215, |
|
"eval_samples_per_second": 1.396, |
|
"eval_steps_per_second": 0.056, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"eval_LCC": -0.28100937429201095, |
|
"eval_SROCC": -0.2921008403361345, |
|
"eval_loss": 0.043372660875320435, |
|
"eval_runtime": 35.795, |
|
"eval_samples_per_second": 1.397, |
|
"eval_steps_per_second": 0.056, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_LCC": -0.25699939917329884, |
|
"eval_SROCC": -0.24811524609843938, |
|
"eval_loss": 0.040563274174928665, |
|
"eval_runtime": 35.9877, |
|
"eval_samples_per_second": 1.389, |
|
"eval_steps_per_second": 0.056, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 1.0520217418670654, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.0249, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.857142857142858, |
|
"eval_LCC": -0.2477874480763097, |
|
"eval_SROCC": -0.23457382953181274, |
|
"eval_loss": 0.040173906832933426, |
|
"eval_runtime": 35.9073, |
|
"eval_samples_per_second": 1.392, |
|
"eval_steps_per_second": 0.056, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_LCC": -0.21817464996358082, |
|
"eval_SROCC": -0.20758703481392557, |
|
"eval_loss": 0.0384274497628212, |
|
"eval_runtime": 35.6916, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"eval_LCC": -0.19225659084922117, |
|
"eval_SROCC": -0.19193277310924367, |
|
"eval_loss": 0.03174906224012375, |
|
"eval_runtime": 35.716, |
|
"eval_samples_per_second": 1.4, |
|
"eval_steps_per_second": 0.056, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 1.398345947265625, |
|
"learning_rate": 9.177439057064684e-06, |
|
"loss": 0.0215, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_LCC": -0.16355167219890276, |
|
"eval_SROCC": -0.15178871548619446, |
|
"eval_loss": 0.031038017943501472, |
|
"eval_runtime": 35.6946, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"eval_LCC": -0.1548916268794662, |
|
"eval_SROCC": -0.1291236494597839, |
|
"eval_loss": 0.03165186941623688, |
|
"eval_runtime": 35.6768, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_LCC": -0.12920387967048147, |
|
"eval_SROCC": -0.09752701080432173, |
|
"eval_loss": 0.03006185218691826, |
|
"eval_runtime": 35.8955, |
|
"eval_samples_per_second": 1.393, |
|
"eval_steps_per_second": 0.056, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 0.4825093746185303, |
|
"learning_rate": 8.43120818934367e-06, |
|
"loss": 0.0154, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"eval_LCC": -0.10573621549419017, |
|
"eval_SROCC": -0.08043217286914765, |
|
"eval_loss": 0.0284834336489439, |
|
"eval_runtime": 35.7717, |
|
"eval_samples_per_second": 1.398, |
|
"eval_steps_per_second": 0.056, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_LCC": -0.07621053426257447, |
|
"eval_SROCC": -0.04614645858343337, |
|
"eval_loss": 0.027657881379127502, |
|
"eval_runtime": 35.7321, |
|
"eval_samples_per_second": 1.399, |
|
"eval_steps_per_second": 0.056, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 16.857142857142858, |
|
"eval_LCC": -0.04854793231941595, |
|
"eval_SROCC": -0.03567827130852341, |
|
"eval_loss": 0.02630411647260189, |
|
"eval_runtime": 35.6683, |
|
"eval_samples_per_second": 1.402, |
|
"eval_steps_per_second": 0.056, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 0.7504790425300598, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.0128, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_LCC": -0.03166657911247084, |
|
"eval_SROCC": -0.01714285714285714, |
|
"eval_loss": 0.026295064017176628, |
|
"eval_runtime": 35.6163, |
|
"eval_samples_per_second": 1.404, |
|
"eval_steps_per_second": 0.056, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"eval_LCC": -0.023641716533156595, |
|
"eval_SROCC": -0.003985594237695078, |
|
"eval_loss": 0.026547763496637344, |
|
"eval_runtime": 35.7061, |
|
"eval_samples_per_second": 1.4, |
|
"eval_steps_per_second": 0.056, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.4742915630340576, |
|
"learning_rate": 6.434016163555452e-06, |
|
"loss": 0.0113, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_LCC": -0.008938391820749465, |
|
"eval_SROCC": 0.022713085234093634, |
|
"eval_loss": 0.026299767196178436, |
|
"eval_runtime": 35.6413, |
|
"eval_samples_per_second": 1.403, |
|
"eval_steps_per_second": 0.056, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 20.857142857142858, |
|
"eval_LCC": 0.008109004168265264, |
|
"eval_SROCC": 0.025402160864345734, |
|
"eval_loss": 0.025580281391739845, |
|
"eval_runtime": 35.724, |
|
"eval_samples_per_second": 1.4, |
|
"eval_steps_per_second": 0.056, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_LCC": 0.023345863441218237, |
|
"eval_SROCC": 0.049315726290516206, |
|
"eval_loss": 0.02492944523692131, |
|
"eval_runtime": 35.624, |
|
"eval_samples_per_second": 1.404, |
|
"eval_steps_per_second": 0.056, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 0.43411004543304443, |
|
"learning_rate": 5.290724144552379e-06, |
|
"loss": 0.0104, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_LCC": 0.033023044716641976, |
|
"eval_SROCC": 0.06160864345738295, |
|
"eval_loss": 0.024564068764448166, |
|
"eval_runtime": 35.6925, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_LCC": 0.04347957438577821, |
|
"eval_SROCC": 0.06909963985594238, |
|
"eval_loss": 0.02423253282904625, |
|
"eval_runtime": 35.9004, |
|
"eval_samples_per_second": 1.393, |
|
"eval_steps_per_second": 0.056, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 24.857142857142858, |
|
"eval_LCC": 0.051818984749542364, |
|
"eval_SROCC": 0.07956782713085234, |
|
"eval_loss": 0.024022720754146576, |
|
"eval_runtime": 35.8318, |
|
"eval_samples_per_second": 1.395, |
|
"eval_steps_per_second": 0.056, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 0.443155437707901, |
|
"learning_rate": 4.131759111665349e-06, |
|
"loss": 0.0095, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_LCC": 0.0679416905737761, |
|
"eval_SROCC": 0.0830252100840336, |
|
"eval_loss": 0.02380475588142872, |
|
"eval_runtime": 35.6776, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"eval_LCC": 0.0746956496870511, |
|
"eval_SROCC": 0.09291716686674668, |
|
"eval_loss": 0.023518024012446404, |
|
"eval_runtime": 35.6658, |
|
"eval_samples_per_second": 1.402, |
|
"eval_steps_per_second": 0.056, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_LCC": 0.08622063268749575, |
|
"eval_SROCC": 0.10031212484993997, |
|
"eval_loss": 0.02321736514568329, |
|
"eval_runtime": 35.7144, |
|
"eval_samples_per_second": 1.4, |
|
"eval_steps_per_second": 0.056, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 0.6047067642211914, |
|
"learning_rate": 3.019601169804216e-06, |
|
"loss": 0.009, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 28.857142857142858, |
|
"eval_LCC": 0.09545687497338319, |
|
"eval_SROCC": 0.10501800720288115, |
|
"eval_loss": 0.022862296551465988, |
|
"eval_runtime": 35.7409, |
|
"eval_samples_per_second": 1.399, |
|
"eval_steps_per_second": 0.056, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_LCC": 0.10522076553867485, |
|
"eval_SROCC": 0.10722689075630251, |
|
"eval_loss": 0.02260303497314453, |
|
"eval_runtime": 35.8268, |
|
"eval_samples_per_second": 1.396, |
|
"eval_steps_per_second": 0.056, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 30.857142857142858, |
|
"eval_LCC": 0.11099832718077456, |
|
"eval_SROCC": 0.11769507803121249, |
|
"eval_loss": 0.022564733400940895, |
|
"eval_runtime": 35.6873, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 0.3605582118034363, |
|
"learning_rate": 2.0142070414860704e-06, |
|
"loss": 0.0084, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_LCC": 0.11518191832516576, |
|
"eval_SROCC": 0.12864345738295319, |
|
"eval_loss": 0.022482411935925484, |
|
"eval_runtime": 35.5295, |
|
"eval_samples_per_second": 1.407, |
|
"eval_steps_per_second": 0.056, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"eval_LCC": 0.11672118384335717, |
|
"eval_SROCC": 0.12960384153661464, |
|
"eval_loss": 0.02244633622467518, |
|
"eval_runtime": 35.6625, |
|
"eval_samples_per_second": 1.402, |
|
"eval_steps_per_second": 0.056, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_LCC": 0.11853509722750845, |
|
"eval_SROCC": 0.12960384153661464, |
|
"eval_loss": 0.022433871403336525, |
|
"eval_runtime": 35.775, |
|
"eval_samples_per_second": 1.398, |
|
"eval_steps_per_second": 0.056, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 0.37335312366485596, |
|
"learning_rate": 1.1697777844051105e-06, |
|
"loss": 0.0085, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 34.857142857142854, |
|
"eval_LCC": 0.11996998016040515, |
|
"eval_SROCC": 0.13104441776710685, |
|
"eval_loss": 0.022421473637223244, |
|
"eval_runtime": 35.8151, |
|
"eval_samples_per_second": 1.396, |
|
"eval_steps_per_second": 0.056, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_LCC": 0.12212764115994604, |
|
"eval_SROCC": 0.12633853541416568, |
|
"eval_loss": 0.02237151563167572, |
|
"eval_runtime": 35.6186, |
|
"eval_samples_per_second": 1.404, |
|
"eval_steps_per_second": 0.056, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 36.857142857142854, |
|
"eval_LCC": 0.12332180896765454, |
|
"eval_SROCC": 0.12489795918367347, |
|
"eval_loss": 0.022355427965521812, |
|
"eval_runtime": 35.9066, |
|
"eval_samples_per_second": 1.393, |
|
"eval_steps_per_second": 0.056, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 37.142857142857146, |
|
"grad_norm": 0.5681003332138062, |
|
"learning_rate": 5.318367983829393e-07, |
|
"loss": 0.0082, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_LCC": 0.12470767155506596, |
|
"eval_SROCC": 0.12720288115246098, |
|
"eval_loss": 0.022325601428747177, |
|
"eval_runtime": 35.6817, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 38.857142857142854, |
|
"eval_LCC": 0.1255198227036719, |
|
"eval_SROCC": 0.12720288115246098, |
|
"eval_loss": 0.02230682410299778, |
|
"eval_runtime": 35.758, |
|
"eval_samples_per_second": 1.398, |
|
"eval_steps_per_second": 0.056, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.49957460165023804, |
|
"learning_rate": 1.3477564710088097e-07, |
|
"loss": 0.008, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_LCC": 0.12651835944328998, |
|
"eval_SROCC": 0.1291236494597839, |
|
"eval_loss": 0.022274091839790344, |
|
"eval_runtime": 35.7033, |
|
"eval_samples_per_second": 1.4, |
|
"eval_steps_per_second": 0.056, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 40.857142857142854, |
|
"eval_LCC": 0.1269013775632479, |
|
"eval_SROCC": 0.1291236494597839, |
|
"eval_loss": 0.022262830287218094, |
|
"eval_runtime": 35.6563, |
|
"eval_samples_per_second": 1.402, |
|
"eval_steps_per_second": 0.056, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_LCC": 0.12710778753633592, |
|
"eval_SROCC": 0.1291236494597839, |
|
"eval_loss": 0.022257346659898758, |
|
"eval_runtime": 35.6941, |
|
"eval_samples_per_second": 1.401, |
|
"eval_steps_per_second": 0.056, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 0.33458444476127625, |
|
"learning_rate": 0.0, |
|
"loss": 0.0078, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"eval_LCC": 0.12712191056512775, |
|
"eval_SROCC": 0.1291236494597839, |
|
"eval_loss": 0.022257013246417046, |
|
"eval_runtime": 35.8013, |
|
"eval_samples_per_second": 1.397, |
|
"eval_steps_per_second": 0.056, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"step": 150, |
|
"total_flos": 2.1207924866757427e+18, |
|
"train_loss": 0.02509542241692543, |
|
"train_runtime": 7697.1048, |
|
"train_samples_per_second": 1.39, |
|
"train_steps_per_second": 0.019 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1207924866757427e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|