groderg's picture
Evaluation on the test set completed on 2024_10_31.
f59385d verified
raw
history blame
17.4 kB
{
"best_metric": 0.6263097524642944,
"best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/Ziboiai-large-2024_10_31-prova_batch-size32_freeze_probs/checkpoint-60",
"epoch": 40.0,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_explained_variance": 0.03638218343257904,
"eval_loss": 0.7150455713272095,
"eval_mae": 0.3848940134048462,
"eval_r2": -20.29086685180664,
"eval_rmse": 0.40997111797332764,
"eval_runtime": 1.32,
"eval_samples_per_second": 37.88,
"eval_steps_per_second": 1.515,
"learning_rate": 0.001,
"step": 2
},
{
"epoch": 2.0,
"eval_explained_variance": 0.0240942370146513,
"eval_loss": 0.7314126491546631,
"eval_mae": 0.3895121216773987,
"eval_r2": -21.218204498291016,
"eval_rmse": 0.4163060486316681,
"eval_runtime": 0.3125,
"eval_samples_per_second": 160.002,
"eval_steps_per_second": 6.4,
"learning_rate": 0.001,
"step": 4
},
{
"epoch": 3.0,
"eval_explained_variance": -0.04694412648677826,
"eval_loss": 0.7726277112960815,
"eval_mae": 0.40413352847099304,
"eval_r2": -24.822391510009766,
"eval_rmse": 0.4320966601371765,
"eval_runtime": 0.3275,
"eval_samples_per_second": 152.667,
"eval_steps_per_second": 6.107,
"learning_rate": 0.001,
"step": 6
},
{
"epoch": 4.0,
"eval_explained_variance": -0.06671242415904999,
"eval_loss": 0.7917326092720032,
"eval_mae": 0.4094983637332916,
"eval_r2": -26.581586837768555,
"eval_rmse": 0.4379725754261017,
"eval_runtime": 0.3153,
"eval_samples_per_second": 158.574,
"eval_steps_per_second": 6.343,
"learning_rate": 0.001,
"step": 8
},
{
"epoch": 5.0,
"eval_explained_variance": -0.13621382415294647,
"eval_loss": 0.7852649092674255,
"eval_mae": 0.402120441198349,
"eval_r2": -26.95589256286621,
"eval_rmse": 0.43184274435043335,
"eval_runtime": 0.3123,
"eval_samples_per_second": 160.086,
"eval_steps_per_second": 6.403,
"learning_rate": 0.001,
"step": 10
},
{
"epoch": 6.0,
"eval_explained_variance": -0.12974193692207336,
"eval_loss": 0.7647674679756165,
"eval_mae": 0.3905399441719055,
"eval_r2": -24.40153694152832,
"eval_rmse": 0.42244094610214233,
"eval_runtime": 0.3317,
"eval_samples_per_second": 150.759,
"eval_steps_per_second": 6.03,
"learning_rate": 0.001,
"step": 12
},
{
"epoch": 7.0,
"eval_explained_variance": -0.10977767407894135,
"eval_loss": 0.7391812205314636,
"eval_mae": 0.376028835773468,
"eval_r2": -22.557889938354492,
"eval_rmse": 0.41028541326522827,
"eval_runtime": 0.3205,
"eval_samples_per_second": 155.989,
"eval_steps_per_second": 6.24,
"learning_rate": 0.001,
"step": 14
},
{
"epoch": 8.0,
"eval_explained_variance": -0.10544480383396149,
"eval_loss": 0.7115270495414734,
"eval_mae": 0.36385056376457214,
"eval_r2": -20.067392349243164,
"eval_rmse": 0.39825379848480225,
"eval_runtime": 0.3104,
"eval_samples_per_second": 161.103,
"eval_steps_per_second": 6.444,
"learning_rate": 0.0001,
"step": 16
},
{
"epoch": 9.0,
"eval_explained_variance": -0.09249210357666016,
"eval_loss": 0.6896975040435791,
"eval_mae": 0.35347798466682434,
"eval_r2": -18.16646385192871,
"eval_rmse": 0.3878582715988159,
"eval_runtime": 0.3226,
"eval_samples_per_second": 155.006,
"eval_steps_per_second": 6.2,
"learning_rate": 0.0001,
"step": 18
},
{
"epoch": 10.0,
"eval_explained_variance": -0.10285507887601852,
"eval_loss": 0.6777035593986511,
"eval_mae": 0.34683120250701904,
"eval_r2": -16.94469451904297,
"eval_rmse": 0.3818005323410034,
"eval_runtime": 0.3016,
"eval_samples_per_second": 165.76,
"eval_steps_per_second": 6.63,
"learning_rate": 0.0001,
"step": 20
},
{
"epoch": 11.0,
"eval_explained_variance": -0.11687294393777847,
"eval_loss": 0.6701759099960327,
"eval_mae": 0.3423532247543335,
"eval_r2": -16.037521362304688,
"eval_rmse": 0.3779585659503937,
"eval_runtime": 0.3107,
"eval_samples_per_second": 160.916,
"eval_steps_per_second": 6.437,
"learning_rate": 0.0001,
"step": 22
},
{
"epoch": 12.0,
"eval_explained_variance": -0.11208173632621765,
"eval_loss": 0.663905918598175,
"eval_mae": 0.3388546407222748,
"eval_r2": -15.605177879333496,
"eval_rmse": 0.37438222765922546,
"eval_runtime": 0.3308,
"eval_samples_per_second": 151.17,
"eval_steps_per_second": 6.047,
"learning_rate": 0.0001,
"step": 24
},
{
"epoch": 13.0,
"eval_explained_variance": -0.10647904872894287,
"eval_loss": 0.656491219997406,
"eval_mae": 0.3345881700515747,
"eval_r2": -14.805088996887207,
"eval_rmse": 0.3702985942363739,
"eval_runtime": 0.3222,
"eval_samples_per_second": 155.166,
"eval_steps_per_second": 6.207,
"learning_rate": 0.0001,
"step": 26
},
{
"epoch": 14.0,
"eval_explained_variance": -0.0958017110824585,
"eval_loss": 0.6501385569572449,
"eval_mae": 0.33100754022598267,
"eval_r2": -14.231175422668457,
"eval_rmse": 0.3668138384819031,
"eval_runtime": 0.3293,
"eval_samples_per_second": 151.853,
"eval_steps_per_second": 6.074,
"learning_rate": 0.0001,
"step": 28
},
{
"epoch": 15.0,
"eval_explained_variance": -0.08547426015138626,
"eval_loss": 0.6467865705490112,
"eval_mae": 0.32885220646858215,
"eval_r2": -14.07986831665039,
"eval_rmse": 0.36475783586502075,
"eval_runtime": 0.3253,
"eval_samples_per_second": 153.717,
"eval_steps_per_second": 6.149,
"learning_rate": 0.0001,
"step": 30
},
{
"epoch": 16.0,
"eval_explained_variance": -0.08231981098651886,
"eval_loss": 0.6471170783042908,
"eval_mae": 0.3288896679878235,
"eval_r2": -14.255745887756348,
"eval_rmse": 0.3650059998035431,
"eval_runtime": 0.305,
"eval_samples_per_second": 163.945,
"eval_steps_per_second": 6.558,
"learning_rate": 0.0001,
"step": 32
},
{
"epoch": 17.0,
"eval_explained_variance": -0.08097466081380844,
"eval_loss": 0.6435126662254333,
"eval_mae": 0.3268200755119324,
"eval_r2": -14.059813499450684,
"eval_rmse": 0.36310678720474243,
"eval_runtime": 0.3322,
"eval_samples_per_second": 150.492,
"eval_steps_per_second": 6.02,
"learning_rate": 0.0001,
"step": 34
},
{
"epoch": 18.0,
"eval_explained_variance": -0.07994352281093597,
"eval_loss": 0.6437923908233643,
"eval_mae": 0.3269612491130829,
"eval_r2": -14.036934852600098,
"eval_rmse": 0.36342939734458923,
"eval_runtime": 0.3107,
"eval_samples_per_second": 160.922,
"eval_steps_per_second": 6.437,
"learning_rate": 0.0001,
"step": 36
},
{
"epoch": 19.0,
"eval_explained_variance": -0.08883289247751236,
"eval_loss": 0.6399621367454529,
"eval_mae": 0.3249860107898712,
"eval_r2": -13.81522274017334,
"eval_rmse": 0.36136963963508606,
"eval_runtime": 0.3104,
"eval_samples_per_second": 161.092,
"eval_steps_per_second": 6.444,
"learning_rate": 0.0001,
"step": 38
},
{
"epoch": 20.0,
"eval_explained_variance": -0.09353505074977875,
"eval_loss": 0.6391971707344055,
"eval_mae": 0.3246455192565918,
"eval_r2": -13.710391998291016,
"eval_rmse": 0.3608955144882202,
"eval_runtime": 0.3119,
"eval_samples_per_second": 160.306,
"eval_steps_per_second": 6.412,
"learning_rate": 0.0001,
"step": 40
},
{
"epoch": 21.0,
"eval_explained_variance": -0.09930111467838287,
"eval_loss": 0.6386714577674866,
"eval_mae": 0.32462170720100403,
"eval_r2": -13.809860229492188,
"eval_rmse": 0.3606450855731964,
"eval_runtime": 0.3149,
"eval_samples_per_second": 158.8,
"eval_steps_per_second": 6.352,
"learning_rate": 0.0001,
"step": 42
},
{
"epoch": 22.0,
"eval_explained_variance": -0.10561199486255646,
"eval_loss": 0.6388444304466248,
"eval_mae": 0.3243348002433777,
"eval_r2": -13.849721908569336,
"eval_rmse": 0.36056435108184814,
"eval_runtime": 0.3094,
"eval_samples_per_second": 161.607,
"eval_steps_per_second": 6.464,
"learning_rate": 0.0001,
"step": 44
},
{
"epoch": 23.0,
"eval_explained_variance": -0.1035044863820076,
"eval_loss": 0.6361631155014038,
"eval_mae": 0.3227779269218445,
"eval_r2": -13.562189102172852,
"eval_rmse": 0.35895633697509766,
"eval_runtime": 0.3094,
"eval_samples_per_second": 161.581,
"eval_steps_per_second": 6.463,
"learning_rate": 0.0001,
"step": 46
},
{
"epoch": 24.0,
"eval_explained_variance": -0.10584529489278793,
"eval_loss": 0.635435163974762,
"eval_mae": 0.3223152160644531,
"eval_r2": -13.645319938659668,
"eval_rmse": 0.35847193002700806,
"eval_runtime": 0.3094,
"eval_samples_per_second": 161.602,
"eval_steps_per_second": 6.464,
"learning_rate": 0.0001,
"step": 48
},
{
"epoch": 25.0,
"eval_explained_variance": -0.1035505086183548,
"eval_loss": 0.6344550848007202,
"eval_mae": 0.32144099473953247,
"eval_r2": -13.602314949035645,
"eval_rmse": 0.35783687233924866,
"eval_runtime": 0.3092,
"eval_samples_per_second": 161.704,
"eval_steps_per_second": 6.468,
"learning_rate": 0.0001,
"step": 50
},
{
"epoch": 26.0,
"eval_explained_variance": -0.11728200316429138,
"eval_loss": 0.6348865628242493,
"eval_mae": 0.3211889863014221,
"eval_r2": -13.630416870117188,
"eval_rmse": 0.3580625355243683,
"eval_runtime": 0.331,
"eval_samples_per_second": 151.064,
"eval_steps_per_second": 6.043,
"learning_rate": 0.0001,
"step": 52
},
{
"epoch": 27.0,
"eval_explained_variance": -0.11483900249004364,
"eval_loss": 0.6332749724388123,
"eval_mae": 0.32009246945381165,
"eval_r2": -13.561347007751465,
"eval_rmse": 0.3570806384086609,
"eval_runtime": 0.3173,
"eval_samples_per_second": 157.565,
"eval_steps_per_second": 6.303,
"learning_rate": 0.0001,
"step": 54
},
{
"epoch": 28.0,
"eval_explained_variance": -0.10828801989555359,
"eval_loss": 0.6295092701911926,
"eval_mae": 0.31767499446868896,
"eval_r2": -13.23308277130127,
"eval_rmse": 0.35479238629341125,
"eval_runtime": 0.3087,
"eval_samples_per_second": 161.989,
"eval_steps_per_second": 6.48,
"learning_rate": 0.0001,
"step": 56
},
{
"epoch": 29.0,
"eval_explained_variance": -0.1047045886516571,
"eval_loss": 0.6285346746444702,
"eval_mae": 0.3173280954360962,
"eval_r2": -13.162256240844727,
"eval_rmse": 0.35434553027153015,
"eval_runtime": 0.3277,
"eval_samples_per_second": 152.596,
"eval_steps_per_second": 6.104,
"learning_rate": 0.0001,
"step": 58
},
{
"epoch": 30.0,
"eval_explained_variance": -0.09264782071113586,
"eval_loss": 0.6263097524642944,
"eval_mae": 0.31627562642097473,
"eval_r2": -12.713174819946289,
"eval_rmse": 0.3532228171825409,
"eval_runtime": 0.3523,
"eval_samples_per_second": 141.931,
"eval_steps_per_second": 5.677,
"learning_rate": 0.0001,
"step": 60
},
{
"epoch": 31.0,
"eval_explained_variance": -0.08934260159730911,
"eval_loss": 0.6272528767585754,
"eval_mae": 0.316723495721817,
"eval_r2": -12.873921394348145,
"eval_rmse": 0.35376670956611633,
"eval_runtime": 0.3073,
"eval_samples_per_second": 162.723,
"eval_steps_per_second": 6.509,
"learning_rate": 0.0001,
"step": 62
},
{
"epoch": 32.0,
"eval_explained_variance": -0.07898036390542984,
"eval_loss": 0.6294133067131042,
"eval_mae": 0.31807586550712585,
"eval_r2": -12.935453414916992,
"eval_rmse": 0.3550169765949249,
"eval_runtime": 0.3094,
"eval_samples_per_second": 161.626,
"eval_steps_per_second": 6.465,
"learning_rate": 0.0001,
"step": 64
},
{
"epoch": 33.0,
"eval_explained_variance": -0.07519607990980148,
"eval_loss": 0.6299176216125488,
"eval_mae": 0.3185364603996277,
"eval_r2": -12.93520736694336,
"eval_rmse": 0.35538923740386963,
"eval_runtime": 0.3097,
"eval_samples_per_second": 161.472,
"eval_steps_per_second": 6.459,
"learning_rate": 0.0001,
"step": 66
},
{
"epoch": 34.0,
"eval_explained_variance": -0.07019602507352829,
"eval_loss": 0.6320692300796509,
"eval_mae": 0.3193182349205017,
"eval_r2": -13.267191886901855,
"eval_rmse": 0.35644862055778503,
"eval_runtime": 0.3161,
"eval_samples_per_second": 158.177,
"eval_steps_per_second": 6.327,
"learning_rate": 0.0001,
"step": 68
},
{
"epoch": 35.0,
"eval_explained_variance": -0.04873532056808472,
"eval_loss": 0.6279481649398804,
"eval_mae": 0.31752488017082214,
"eval_r2": -12.99951171875,
"eval_rmse": 0.3541102707386017,
"eval_runtime": 0.3124,
"eval_samples_per_second": 160.036,
"eval_steps_per_second": 6.401,
"learning_rate": 0.0001,
"step": 70
},
{
"epoch": 36.0,
"eval_explained_variance": -0.04663123935461044,
"eval_loss": 0.6280075907707214,
"eval_mae": 0.31736499071121216,
"eval_r2": -13.00741195678711,
"eval_rmse": 0.35407301783561707,
"eval_runtime": 0.3095,
"eval_samples_per_second": 161.554,
"eval_steps_per_second": 6.462,
"learning_rate": 0.0001,
"step": 72
},
{
"epoch": 37.0,
"eval_explained_variance": -0.04936327785253525,
"eval_loss": 0.6303659081459045,
"eval_mae": 0.3187006115913391,
"eval_r2": -13.230977058410645,
"eval_rmse": 0.35543760657310486,
"eval_runtime": 0.3251,
"eval_samples_per_second": 153.806,
"eval_steps_per_second": 6.152,
"learning_rate": 1e-05,
"step": 74
},
{
"epoch": 38.0,
"eval_explained_variance": -0.04394898936152458,
"eval_loss": 0.6297122836112976,
"eval_mae": 0.31833118200302124,
"eval_r2": -12.983016967773438,
"eval_rmse": 0.3550592064857483,
"eval_runtime": 0.3087,
"eval_samples_per_second": 161.995,
"eval_steps_per_second": 6.48,
"learning_rate": 1e-05,
"step": 76
},
{
"epoch": 39.0,
"eval_explained_variance": -0.04296223446726799,
"eval_loss": 0.630845308303833,
"eval_mae": 0.3193325996398926,
"eval_r2": -13.159842491149902,
"eval_rmse": 0.35580796003341675,
"eval_runtime": 0.3097,
"eval_samples_per_second": 161.465,
"eval_steps_per_second": 6.459,
"learning_rate": 1e-05,
"step": 78
},
{
"epoch": 40.0,
"eval_explained_variance": -0.04348618537187576,
"eval_loss": 0.6291573643684387,
"eval_mae": 0.3182610869407654,
"eval_r2": -13.069788932800293,
"eval_rmse": 0.3547934889793396,
"eval_runtime": 0.3206,
"eval_samples_per_second": 155.938,
"eval_steps_per_second": 6.238,
"learning_rate": 1e-05,
"step": 80
},
{
"epoch": 40.0,
"learning_rate": 1e-05,
"step": 80,
"total_flos": 2.9601852123168e+17,
"train_loss": 0.64580397605896,
"train_runtime": 275.9938,
"train_samples_per_second": 27.175,
"train_steps_per_second": 1.087
}
],
"logging_steps": 500,
"max_steps": 300,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.9601852123168e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}