|
{ |
|
"best_metric": 0.6263097524642944, |
|
"best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/Ziboiai-large-2024_10_31-prova_batch-size32_freeze_probs/checkpoint-60", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.03638218343257904, |
|
"eval_loss": 0.7150455713272095, |
|
"eval_mae": 0.3848940134048462, |
|
"eval_r2": -20.29086685180664, |
|
"eval_rmse": 0.40997111797332764, |
|
"eval_runtime": 1.32, |
|
"eval_samples_per_second": 37.88, |
|
"eval_steps_per_second": 1.515, |
|
"learning_rate": 0.001, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.0240942370146513, |
|
"eval_loss": 0.7314126491546631, |
|
"eval_mae": 0.3895121216773987, |
|
"eval_r2": -21.218204498291016, |
|
"eval_rmse": 0.4163060486316681, |
|
"eval_runtime": 0.3125, |
|
"eval_samples_per_second": 160.002, |
|
"eval_steps_per_second": 6.4, |
|
"learning_rate": 0.001, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_explained_variance": -0.04694412648677826, |
|
"eval_loss": 0.7726277112960815, |
|
"eval_mae": 0.40413352847099304, |
|
"eval_r2": -24.822391510009766, |
|
"eval_rmse": 0.4320966601371765, |
|
"eval_runtime": 0.3275, |
|
"eval_samples_per_second": 152.667, |
|
"eval_steps_per_second": 6.107, |
|
"learning_rate": 0.001, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_explained_variance": -0.06671242415904999, |
|
"eval_loss": 0.7917326092720032, |
|
"eval_mae": 0.4094983637332916, |
|
"eval_r2": -26.581586837768555, |
|
"eval_rmse": 0.4379725754261017, |
|
"eval_runtime": 0.3153, |
|
"eval_samples_per_second": 158.574, |
|
"eval_steps_per_second": 6.343, |
|
"learning_rate": 0.001, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_explained_variance": -0.13621382415294647, |
|
"eval_loss": 0.7852649092674255, |
|
"eval_mae": 0.402120441198349, |
|
"eval_r2": -26.95589256286621, |
|
"eval_rmse": 0.43184274435043335, |
|
"eval_runtime": 0.3123, |
|
"eval_samples_per_second": 160.086, |
|
"eval_steps_per_second": 6.403, |
|
"learning_rate": 0.001, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_explained_variance": -0.12974193692207336, |
|
"eval_loss": 0.7647674679756165, |
|
"eval_mae": 0.3905399441719055, |
|
"eval_r2": -24.40153694152832, |
|
"eval_rmse": 0.42244094610214233, |
|
"eval_runtime": 0.3317, |
|
"eval_samples_per_second": 150.759, |
|
"eval_steps_per_second": 6.03, |
|
"learning_rate": 0.001, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_explained_variance": -0.10977767407894135, |
|
"eval_loss": 0.7391812205314636, |
|
"eval_mae": 0.376028835773468, |
|
"eval_r2": -22.557889938354492, |
|
"eval_rmse": 0.41028541326522827, |
|
"eval_runtime": 0.3205, |
|
"eval_samples_per_second": 155.989, |
|
"eval_steps_per_second": 6.24, |
|
"learning_rate": 0.001, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_explained_variance": -0.10544480383396149, |
|
"eval_loss": 0.7115270495414734, |
|
"eval_mae": 0.36385056376457214, |
|
"eval_r2": -20.067392349243164, |
|
"eval_rmse": 0.39825379848480225, |
|
"eval_runtime": 0.3104, |
|
"eval_samples_per_second": 161.103, |
|
"eval_steps_per_second": 6.444, |
|
"learning_rate": 0.0001, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_explained_variance": -0.09249210357666016, |
|
"eval_loss": 0.6896975040435791, |
|
"eval_mae": 0.35347798466682434, |
|
"eval_r2": -18.16646385192871, |
|
"eval_rmse": 0.3878582715988159, |
|
"eval_runtime": 0.3226, |
|
"eval_samples_per_second": 155.006, |
|
"eval_steps_per_second": 6.2, |
|
"learning_rate": 0.0001, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_explained_variance": -0.10285507887601852, |
|
"eval_loss": 0.6777035593986511, |
|
"eval_mae": 0.34683120250701904, |
|
"eval_r2": -16.94469451904297, |
|
"eval_rmse": 0.3818005323410034, |
|
"eval_runtime": 0.3016, |
|
"eval_samples_per_second": 165.76, |
|
"eval_steps_per_second": 6.63, |
|
"learning_rate": 0.0001, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_explained_variance": -0.11687294393777847, |
|
"eval_loss": 0.6701759099960327, |
|
"eval_mae": 0.3423532247543335, |
|
"eval_r2": -16.037521362304688, |
|
"eval_rmse": 0.3779585659503937, |
|
"eval_runtime": 0.3107, |
|
"eval_samples_per_second": 160.916, |
|
"eval_steps_per_second": 6.437, |
|
"learning_rate": 0.0001, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_explained_variance": -0.11208173632621765, |
|
"eval_loss": 0.663905918598175, |
|
"eval_mae": 0.3388546407222748, |
|
"eval_r2": -15.605177879333496, |
|
"eval_rmse": 0.37438222765922546, |
|
"eval_runtime": 0.3308, |
|
"eval_samples_per_second": 151.17, |
|
"eval_steps_per_second": 6.047, |
|
"learning_rate": 0.0001, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_explained_variance": -0.10647904872894287, |
|
"eval_loss": 0.656491219997406, |
|
"eval_mae": 0.3345881700515747, |
|
"eval_r2": -14.805088996887207, |
|
"eval_rmse": 0.3702985942363739, |
|
"eval_runtime": 0.3222, |
|
"eval_samples_per_second": 155.166, |
|
"eval_steps_per_second": 6.207, |
|
"learning_rate": 0.0001, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_explained_variance": -0.0958017110824585, |
|
"eval_loss": 0.6501385569572449, |
|
"eval_mae": 0.33100754022598267, |
|
"eval_r2": -14.231175422668457, |
|
"eval_rmse": 0.3668138384819031, |
|
"eval_runtime": 0.3293, |
|
"eval_samples_per_second": 151.853, |
|
"eval_steps_per_second": 6.074, |
|
"learning_rate": 0.0001, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_explained_variance": -0.08547426015138626, |
|
"eval_loss": 0.6467865705490112, |
|
"eval_mae": 0.32885220646858215, |
|
"eval_r2": -14.07986831665039, |
|
"eval_rmse": 0.36475783586502075, |
|
"eval_runtime": 0.3253, |
|
"eval_samples_per_second": 153.717, |
|
"eval_steps_per_second": 6.149, |
|
"learning_rate": 0.0001, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_explained_variance": -0.08231981098651886, |
|
"eval_loss": 0.6471170783042908, |
|
"eval_mae": 0.3288896679878235, |
|
"eval_r2": -14.255745887756348, |
|
"eval_rmse": 0.3650059998035431, |
|
"eval_runtime": 0.305, |
|
"eval_samples_per_second": 163.945, |
|
"eval_steps_per_second": 6.558, |
|
"learning_rate": 0.0001, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_explained_variance": -0.08097466081380844, |
|
"eval_loss": 0.6435126662254333, |
|
"eval_mae": 0.3268200755119324, |
|
"eval_r2": -14.059813499450684, |
|
"eval_rmse": 0.36310678720474243, |
|
"eval_runtime": 0.3322, |
|
"eval_samples_per_second": 150.492, |
|
"eval_steps_per_second": 6.02, |
|
"learning_rate": 0.0001, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_explained_variance": -0.07994352281093597, |
|
"eval_loss": 0.6437923908233643, |
|
"eval_mae": 0.3269612491130829, |
|
"eval_r2": -14.036934852600098, |
|
"eval_rmse": 0.36342939734458923, |
|
"eval_runtime": 0.3107, |
|
"eval_samples_per_second": 160.922, |
|
"eval_steps_per_second": 6.437, |
|
"learning_rate": 0.0001, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_explained_variance": -0.08883289247751236, |
|
"eval_loss": 0.6399621367454529, |
|
"eval_mae": 0.3249860107898712, |
|
"eval_r2": -13.81522274017334, |
|
"eval_rmse": 0.36136963963508606, |
|
"eval_runtime": 0.3104, |
|
"eval_samples_per_second": 161.092, |
|
"eval_steps_per_second": 6.444, |
|
"learning_rate": 0.0001, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_explained_variance": -0.09353505074977875, |
|
"eval_loss": 0.6391971707344055, |
|
"eval_mae": 0.3246455192565918, |
|
"eval_r2": -13.710391998291016, |
|
"eval_rmse": 0.3608955144882202, |
|
"eval_runtime": 0.3119, |
|
"eval_samples_per_second": 160.306, |
|
"eval_steps_per_second": 6.412, |
|
"learning_rate": 0.0001, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_explained_variance": -0.09930111467838287, |
|
"eval_loss": 0.6386714577674866, |
|
"eval_mae": 0.32462170720100403, |
|
"eval_r2": -13.809860229492188, |
|
"eval_rmse": 0.3606450855731964, |
|
"eval_runtime": 0.3149, |
|
"eval_samples_per_second": 158.8, |
|
"eval_steps_per_second": 6.352, |
|
"learning_rate": 0.0001, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_explained_variance": -0.10561199486255646, |
|
"eval_loss": 0.6388444304466248, |
|
"eval_mae": 0.3243348002433777, |
|
"eval_r2": -13.849721908569336, |
|
"eval_rmse": 0.36056435108184814, |
|
"eval_runtime": 0.3094, |
|
"eval_samples_per_second": 161.607, |
|
"eval_steps_per_second": 6.464, |
|
"learning_rate": 0.0001, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_explained_variance": -0.1035044863820076, |
|
"eval_loss": 0.6361631155014038, |
|
"eval_mae": 0.3227779269218445, |
|
"eval_r2": -13.562189102172852, |
|
"eval_rmse": 0.35895633697509766, |
|
"eval_runtime": 0.3094, |
|
"eval_samples_per_second": 161.581, |
|
"eval_steps_per_second": 6.463, |
|
"learning_rate": 0.0001, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_explained_variance": -0.10584529489278793, |
|
"eval_loss": 0.635435163974762, |
|
"eval_mae": 0.3223152160644531, |
|
"eval_r2": -13.645319938659668, |
|
"eval_rmse": 0.35847193002700806, |
|
"eval_runtime": 0.3094, |
|
"eval_samples_per_second": 161.602, |
|
"eval_steps_per_second": 6.464, |
|
"learning_rate": 0.0001, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_explained_variance": -0.1035505086183548, |
|
"eval_loss": 0.6344550848007202, |
|
"eval_mae": 0.32144099473953247, |
|
"eval_r2": -13.602314949035645, |
|
"eval_rmse": 0.35783687233924866, |
|
"eval_runtime": 0.3092, |
|
"eval_samples_per_second": 161.704, |
|
"eval_steps_per_second": 6.468, |
|
"learning_rate": 0.0001, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_explained_variance": -0.11728200316429138, |
|
"eval_loss": 0.6348865628242493, |
|
"eval_mae": 0.3211889863014221, |
|
"eval_r2": -13.630416870117188, |
|
"eval_rmse": 0.3580625355243683, |
|
"eval_runtime": 0.331, |
|
"eval_samples_per_second": 151.064, |
|
"eval_steps_per_second": 6.043, |
|
"learning_rate": 0.0001, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_explained_variance": -0.11483900249004364, |
|
"eval_loss": 0.6332749724388123, |
|
"eval_mae": 0.32009246945381165, |
|
"eval_r2": -13.561347007751465, |
|
"eval_rmse": 0.3570806384086609, |
|
"eval_runtime": 0.3173, |
|
"eval_samples_per_second": 157.565, |
|
"eval_steps_per_second": 6.303, |
|
"learning_rate": 0.0001, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_explained_variance": -0.10828801989555359, |
|
"eval_loss": 0.6295092701911926, |
|
"eval_mae": 0.31767499446868896, |
|
"eval_r2": -13.23308277130127, |
|
"eval_rmse": 0.35479238629341125, |
|
"eval_runtime": 0.3087, |
|
"eval_samples_per_second": 161.989, |
|
"eval_steps_per_second": 6.48, |
|
"learning_rate": 0.0001, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_explained_variance": -0.1047045886516571, |
|
"eval_loss": 0.6285346746444702, |
|
"eval_mae": 0.3173280954360962, |
|
"eval_r2": -13.162256240844727, |
|
"eval_rmse": 0.35434553027153015, |
|
"eval_runtime": 0.3277, |
|
"eval_samples_per_second": 152.596, |
|
"eval_steps_per_second": 6.104, |
|
"learning_rate": 0.0001, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_explained_variance": -0.09264782071113586, |
|
"eval_loss": 0.6263097524642944, |
|
"eval_mae": 0.31627562642097473, |
|
"eval_r2": -12.713174819946289, |
|
"eval_rmse": 0.3532228171825409, |
|
"eval_runtime": 0.3523, |
|
"eval_samples_per_second": 141.931, |
|
"eval_steps_per_second": 5.677, |
|
"learning_rate": 0.0001, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_explained_variance": -0.08934260159730911, |
|
"eval_loss": 0.6272528767585754, |
|
"eval_mae": 0.316723495721817, |
|
"eval_r2": -12.873921394348145, |
|
"eval_rmse": 0.35376670956611633, |
|
"eval_runtime": 0.3073, |
|
"eval_samples_per_second": 162.723, |
|
"eval_steps_per_second": 6.509, |
|
"learning_rate": 0.0001, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_explained_variance": -0.07898036390542984, |
|
"eval_loss": 0.6294133067131042, |
|
"eval_mae": 0.31807586550712585, |
|
"eval_r2": -12.935453414916992, |
|
"eval_rmse": 0.3550169765949249, |
|
"eval_runtime": 0.3094, |
|
"eval_samples_per_second": 161.626, |
|
"eval_steps_per_second": 6.465, |
|
"learning_rate": 0.0001, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_explained_variance": -0.07519607990980148, |
|
"eval_loss": 0.6299176216125488, |
|
"eval_mae": 0.3185364603996277, |
|
"eval_r2": -12.93520736694336, |
|
"eval_rmse": 0.35538923740386963, |
|
"eval_runtime": 0.3097, |
|
"eval_samples_per_second": 161.472, |
|
"eval_steps_per_second": 6.459, |
|
"learning_rate": 0.0001, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_explained_variance": -0.07019602507352829, |
|
"eval_loss": 0.6320692300796509, |
|
"eval_mae": 0.3193182349205017, |
|
"eval_r2": -13.267191886901855, |
|
"eval_rmse": 0.35644862055778503, |
|
"eval_runtime": 0.3161, |
|
"eval_samples_per_second": 158.177, |
|
"eval_steps_per_second": 6.327, |
|
"learning_rate": 0.0001, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_explained_variance": -0.04873532056808472, |
|
"eval_loss": 0.6279481649398804, |
|
"eval_mae": 0.31752488017082214, |
|
"eval_r2": -12.99951171875, |
|
"eval_rmse": 0.3541102707386017, |
|
"eval_runtime": 0.3124, |
|
"eval_samples_per_second": 160.036, |
|
"eval_steps_per_second": 6.401, |
|
"learning_rate": 0.0001, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_explained_variance": -0.04663123935461044, |
|
"eval_loss": 0.6280075907707214, |
|
"eval_mae": 0.31736499071121216, |
|
"eval_r2": -13.00741195678711, |
|
"eval_rmse": 0.35407301783561707, |
|
"eval_runtime": 0.3095, |
|
"eval_samples_per_second": 161.554, |
|
"eval_steps_per_second": 6.462, |
|
"learning_rate": 0.0001, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_explained_variance": -0.04936327785253525, |
|
"eval_loss": 0.6303659081459045, |
|
"eval_mae": 0.3187006115913391, |
|
"eval_r2": -13.230977058410645, |
|
"eval_rmse": 0.35543760657310486, |
|
"eval_runtime": 0.3251, |
|
"eval_samples_per_second": 153.806, |
|
"eval_steps_per_second": 6.152, |
|
"learning_rate": 1e-05, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_explained_variance": -0.04394898936152458, |
|
"eval_loss": 0.6297122836112976, |
|
"eval_mae": 0.31833118200302124, |
|
"eval_r2": -12.983016967773438, |
|
"eval_rmse": 0.3550592064857483, |
|
"eval_runtime": 0.3087, |
|
"eval_samples_per_second": 161.995, |
|
"eval_steps_per_second": 6.48, |
|
"learning_rate": 1e-05, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_explained_variance": -0.04296223446726799, |
|
"eval_loss": 0.630845308303833, |
|
"eval_mae": 0.3193325996398926, |
|
"eval_r2": -13.159842491149902, |
|
"eval_rmse": 0.35580796003341675, |
|
"eval_runtime": 0.3097, |
|
"eval_samples_per_second": 161.465, |
|
"eval_steps_per_second": 6.459, |
|
"learning_rate": 1e-05, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_explained_variance": -0.04348618537187576, |
|
"eval_loss": 0.6291573643684387, |
|
"eval_mae": 0.3182610869407654, |
|
"eval_r2": -13.069788932800293, |
|
"eval_rmse": 0.3547934889793396, |
|
"eval_runtime": 0.3206, |
|
"eval_samples_per_second": 155.938, |
|
"eval_steps_per_second": 6.238, |
|
"learning_rate": 1e-05, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1e-05, |
|
"step": 80, |
|
"total_flos": 2.9601852123168e+17, |
|
"train_loss": 0.64580397605896, |
|
"train_runtime": 275.9938, |
|
"train_samples_per_second": 27.175, |
|
"train_steps_per_second": 1.087 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9601852123168e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|