{ "best_metric": 0.7834388131116452, "best_model_checkpoint": "nuc_arg_short_read/checkpoint-1000", "epoch": 0.6024244914731868, "eval_steps": 100, "global_step": 1466, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04109307581672488, "grad_norm": 31.502859115600586, "learning_rate": 3.2612551159618005e-05, "loss": 1.082, "step": 100 }, { "epoch": 0.04109307581672488, "eval_loss": 1.110230803489685, "eval_macro_f1_score": 0.231015523052847, "eval_micro_f1_score": 0.6498612395929695, "eval_runtime": 65.9015, "eval_samples_per_second": 131.302, "eval_steps_per_second": 16.418, "eval_weighted_f1_score": 0.635148001778528, "step": 100 }, { "epoch": 0.08218615163344976, "grad_norm": 20.506277084350586, "learning_rate": 3.0225102319236013e-05, "loss": 0.9583, "step": 200 }, { "epoch": 0.08218615163344976, "eval_loss": 1.0097736120224, "eval_macro_f1_score": 0.2658630277675259, "eval_micro_f1_score": 0.6996993524514339, "eval_runtime": 65.8265, "eval_samples_per_second": 131.452, "eval_steps_per_second": 16.437, "eval_weighted_f1_score": 0.6867369276159828, "step": 200 }, { "epoch": 0.12327922745017464, "grad_norm": 38.54680252075195, "learning_rate": 2.7837653478854025e-05, "loss": 0.8617, "step": 300 }, { "epoch": 0.12327922745017464, "eval_loss": 0.9992199540138245, "eval_macro_f1_score": 0.2775563277122497, "eval_micro_f1_score": 0.7024745605920444, "eval_runtime": 65.7273, "eval_samples_per_second": 131.65, "eval_steps_per_second": 16.462, "eval_weighted_f1_score": 0.6940687878434678, "step": 300 }, { "epoch": 0.16437230326689953, "grad_norm": 31.71833038330078, "learning_rate": 2.545020463847203e-05, "loss": 0.8274, "step": 400 }, { "epoch": 0.16437230326689953, "eval_loss": 0.9398019909858704, "eval_macro_f1_score": 0.2873778821158207, "eval_micro_f1_score": 0.7252543940795559, "eval_runtime": 65.782, "eval_samples_per_second": 131.541, "eval_steps_per_second": 16.448, "eval_weighted_f1_score": 0.7146058315338393, "step": 400 }, { "epoch": 0.20546537908362442, "grad_norm": 35.85011291503906, "learning_rate": 2.306275579809004e-05, "loss": 0.8531, "step": 500 }, { "epoch": 0.20546537908362442, "eval_loss": 0.8932695388793945, "eval_macro_f1_score": 0.2954866176285143, "eval_micro_f1_score": 0.741327474560592, "eval_runtime": 65.7129, "eval_samples_per_second": 131.679, "eval_steps_per_second": 16.466, "eval_weighted_f1_score": 0.7279810893615292, "step": 500 }, { "epoch": 0.24655845490034928, "grad_norm": 20.54347801208496, "learning_rate": 2.0675306957708046e-05, "loss": 0.9093, "step": 600 }, { "epoch": 0.24655845490034928, "eval_loss": 0.8818169236183167, "eval_macro_f1_score": 0.30056528194912985, "eval_micro_f1_score": 0.7472247918593895, "eval_runtime": 65.7581, "eval_samples_per_second": 131.588, "eval_steps_per_second": 16.454, "eval_weighted_f1_score": 0.7317018335209305, "step": 600 }, { "epoch": 0.28765153071707417, "grad_norm": 32.46955108642578, "learning_rate": 1.8287858117326057e-05, "loss": 0.8248, "step": 700 }, { "epoch": 0.28765153071707417, "eval_loss": 0.8534889817237854, "eval_macro_f1_score": 0.32642004813212766, "eval_micro_f1_score": 0.7561285846438482, "eval_runtime": 65.8232, "eval_samples_per_second": 131.458, "eval_steps_per_second": 16.438, "eval_weighted_f1_score": 0.7458471048255579, "step": 700 }, { "epoch": 0.32874460653379906, "grad_norm": 25.37872886657715, "learning_rate": 1.5900409276944065e-05, "loss": 0.8564, "step": 800 }, { "epoch": 0.32874460653379906, "eval_loss": 0.8019793033599854, "eval_macro_f1_score": 0.3347865283632992, "eval_micro_f1_score": 0.7738205365402405, "eval_runtime": 65.8491, "eval_samples_per_second": 131.407, "eval_steps_per_second": 16.432, "eval_weighted_f1_score": 0.7628080012858742, "step": 800 }, { "epoch": 0.36983768235052394, "grad_norm": 28.011754989624023, "learning_rate": 1.3512960436562072e-05, "loss": 0.8075, "step": 900 }, { "epoch": 0.36983768235052394, "eval_loss": 0.7745345234870911, "eval_macro_f1_score": 0.35349283320361347, "eval_micro_f1_score": 0.7870027752081407, "eval_runtime": 65.798, "eval_samples_per_second": 131.508, "eval_steps_per_second": 16.444, "eval_weighted_f1_score": 0.7762822583099493, "step": 900 }, { "epoch": 0.41093075816724883, "grad_norm": 19.038227081298828, "learning_rate": 1.112551159618008e-05, "loss": 0.7665, "step": 1000 }, { "epoch": 0.41093075816724883, "eval_loss": 0.7573076486587524, "eval_macro_f1_score": 0.34864536541081076, "eval_micro_f1_score": 0.7922062904717854, "eval_runtime": 65.947, "eval_samples_per_second": 131.212, "eval_steps_per_second": 16.407, "eval_weighted_f1_score": 0.7834388131116452, "step": 1000 }, { "epoch": 0.4520238339839737, "grad_norm": 26.79417610168457, "learning_rate": 8.73806275579809e-06, "loss": 0.7955, "step": 1100 }, { "epoch": 0.4520238339839737, "eval_loss": 0.7447549700737, "eval_macro_f1_score": 0.372113449187476, "eval_micro_f1_score": 0.794981498612396, "eval_runtime": 65.765, "eval_samples_per_second": 131.575, "eval_steps_per_second": 16.453, "eval_weighted_f1_score": 0.7855680502155937, "step": 1100 }, { "epoch": 0.49311690980069856, "grad_norm": 31.958662033081055, "learning_rate": 6.350613915416098e-06, "loss": 0.7154, "step": 1200 }, { "epoch": 0.49311690980069856, "eval_loss": 0.7216053009033203, "eval_macro_f1_score": 0.37899321696255506, "eval_micro_f1_score": 0.8055041628122109, "eval_runtime": 65.8483, "eval_samples_per_second": 131.408, "eval_steps_per_second": 16.432, "eval_weighted_f1_score": 0.795726477960455, "step": 1200 }, { "epoch": 0.5342099856174235, "grad_norm": 14.647438049316406, "learning_rate": 3.963165075034106e-06, "loss": 0.7072, "step": 1300 }, { "epoch": 0.5342099856174235, "eval_loss": 0.7128139734268188, "eval_macro_f1_score": 0.38961014388267884, "eval_micro_f1_score": 0.8093200740055504, "eval_runtime": 65.8124, "eval_samples_per_second": 131.48, "eval_steps_per_second": 16.441, "eval_weighted_f1_score": 0.8020259006536976, "step": 1300 }, { "epoch": 0.5753030614341483, "grad_norm": 25.068248748779297, "learning_rate": 1.5757162346521146e-06, "loss": 0.7256, "step": 1400 }, { "epoch": 0.5753030614341483, "eval_loss": 0.7055332064628601, "eval_macro_f1_score": 0.3934822923181016, "eval_micro_f1_score": 0.8125578168362627, "eval_runtime": 65.9442, "eval_samples_per_second": 131.217, "eval_steps_per_second": 16.408, "eval_weighted_f1_score": 0.8039225388312238, "step": 1400 } ], "logging_steps": 100, "max_steps": 1466, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2607458880921600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }