|
{ |
|
"best_metric": 0.7834388131116452, |
|
"best_model_checkpoint": "nuc_arg_short_read/checkpoint-1000", |
|
"epoch": 0.6024244914731868, |
|
"eval_steps": 100, |
|
"global_step": 1466, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04109307581672488, |
|
"grad_norm": 31.502859115600586, |
|
"learning_rate": 3.2612551159618005e-05, |
|
"loss": 1.082, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04109307581672488, |
|
"eval_loss": 1.110230803489685, |
|
"eval_macro_f1_score": 0.231015523052847, |
|
"eval_micro_f1_score": 0.6498612395929695, |
|
"eval_runtime": 65.9015, |
|
"eval_samples_per_second": 131.302, |
|
"eval_steps_per_second": 16.418, |
|
"eval_weighted_f1_score": 0.635148001778528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08218615163344976, |
|
"grad_norm": 20.506277084350586, |
|
"learning_rate": 3.0225102319236013e-05, |
|
"loss": 0.9583, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08218615163344976, |
|
"eval_loss": 1.0097736120224, |
|
"eval_macro_f1_score": 0.2658630277675259, |
|
"eval_micro_f1_score": 0.6996993524514339, |
|
"eval_runtime": 65.8265, |
|
"eval_samples_per_second": 131.452, |
|
"eval_steps_per_second": 16.437, |
|
"eval_weighted_f1_score": 0.6867369276159828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12327922745017464, |
|
"grad_norm": 38.54680252075195, |
|
"learning_rate": 2.7837653478854025e-05, |
|
"loss": 0.8617, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12327922745017464, |
|
"eval_loss": 0.9992199540138245, |
|
"eval_macro_f1_score": 0.2775563277122497, |
|
"eval_micro_f1_score": 0.7024745605920444, |
|
"eval_runtime": 65.7273, |
|
"eval_samples_per_second": 131.65, |
|
"eval_steps_per_second": 16.462, |
|
"eval_weighted_f1_score": 0.6940687878434678, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16437230326689953, |
|
"grad_norm": 31.71833038330078, |
|
"learning_rate": 2.545020463847203e-05, |
|
"loss": 0.8274, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.16437230326689953, |
|
"eval_loss": 0.9398019909858704, |
|
"eval_macro_f1_score": 0.2873778821158207, |
|
"eval_micro_f1_score": 0.7252543940795559, |
|
"eval_runtime": 65.782, |
|
"eval_samples_per_second": 131.541, |
|
"eval_steps_per_second": 16.448, |
|
"eval_weighted_f1_score": 0.7146058315338393, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20546537908362442, |
|
"grad_norm": 35.85011291503906, |
|
"learning_rate": 2.306275579809004e-05, |
|
"loss": 0.8531, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.20546537908362442, |
|
"eval_loss": 0.8932695388793945, |
|
"eval_macro_f1_score": 0.2954866176285143, |
|
"eval_micro_f1_score": 0.741327474560592, |
|
"eval_runtime": 65.7129, |
|
"eval_samples_per_second": 131.679, |
|
"eval_steps_per_second": 16.466, |
|
"eval_weighted_f1_score": 0.7279810893615292, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24655845490034928, |
|
"grad_norm": 20.54347801208496, |
|
"learning_rate": 2.0675306957708046e-05, |
|
"loss": 0.9093, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24655845490034928, |
|
"eval_loss": 0.8818169236183167, |
|
"eval_macro_f1_score": 0.30056528194912985, |
|
"eval_micro_f1_score": 0.7472247918593895, |
|
"eval_runtime": 65.7581, |
|
"eval_samples_per_second": 131.588, |
|
"eval_steps_per_second": 16.454, |
|
"eval_weighted_f1_score": 0.7317018335209305, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.28765153071707417, |
|
"grad_norm": 32.46955108642578, |
|
"learning_rate": 1.8287858117326057e-05, |
|
"loss": 0.8248, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28765153071707417, |
|
"eval_loss": 0.8534889817237854, |
|
"eval_macro_f1_score": 0.32642004813212766, |
|
"eval_micro_f1_score": 0.7561285846438482, |
|
"eval_runtime": 65.8232, |
|
"eval_samples_per_second": 131.458, |
|
"eval_steps_per_second": 16.438, |
|
"eval_weighted_f1_score": 0.7458471048255579, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.32874460653379906, |
|
"grad_norm": 25.37872886657715, |
|
"learning_rate": 1.5900409276944065e-05, |
|
"loss": 0.8564, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.32874460653379906, |
|
"eval_loss": 0.8019793033599854, |
|
"eval_macro_f1_score": 0.3347865283632992, |
|
"eval_micro_f1_score": 0.7738205365402405, |
|
"eval_runtime": 65.8491, |
|
"eval_samples_per_second": 131.407, |
|
"eval_steps_per_second": 16.432, |
|
"eval_weighted_f1_score": 0.7628080012858742, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.36983768235052394, |
|
"grad_norm": 28.011754989624023, |
|
"learning_rate": 1.3512960436562072e-05, |
|
"loss": 0.8075, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.36983768235052394, |
|
"eval_loss": 0.7745345234870911, |
|
"eval_macro_f1_score": 0.35349283320361347, |
|
"eval_micro_f1_score": 0.7870027752081407, |
|
"eval_runtime": 65.798, |
|
"eval_samples_per_second": 131.508, |
|
"eval_steps_per_second": 16.444, |
|
"eval_weighted_f1_score": 0.7762822583099493, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41093075816724883, |
|
"grad_norm": 19.038227081298828, |
|
"learning_rate": 1.112551159618008e-05, |
|
"loss": 0.7665, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41093075816724883, |
|
"eval_loss": 0.7573076486587524, |
|
"eval_macro_f1_score": 0.34864536541081076, |
|
"eval_micro_f1_score": 0.7922062904717854, |
|
"eval_runtime": 65.947, |
|
"eval_samples_per_second": 131.212, |
|
"eval_steps_per_second": 16.407, |
|
"eval_weighted_f1_score": 0.7834388131116452, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4520238339839737, |
|
"grad_norm": 26.79417610168457, |
|
"learning_rate": 8.73806275579809e-06, |
|
"loss": 0.7955, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4520238339839737, |
|
"eval_loss": 0.7447549700737, |
|
"eval_macro_f1_score": 0.372113449187476, |
|
"eval_micro_f1_score": 0.794981498612396, |
|
"eval_runtime": 65.765, |
|
"eval_samples_per_second": 131.575, |
|
"eval_steps_per_second": 16.453, |
|
"eval_weighted_f1_score": 0.7855680502155937, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.49311690980069856, |
|
"grad_norm": 31.958662033081055, |
|
"learning_rate": 6.350613915416098e-06, |
|
"loss": 0.7154, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.49311690980069856, |
|
"eval_loss": 0.7216053009033203, |
|
"eval_macro_f1_score": 0.37899321696255506, |
|
"eval_micro_f1_score": 0.8055041628122109, |
|
"eval_runtime": 65.8483, |
|
"eval_samples_per_second": 131.408, |
|
"eval_steps_per_second": 16.432, |
|
"eval_weighted_f1_score": 0.795726477960455, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5342099856174235, |
|
"grad_norm": 14.647438049316406, |
|
"learning_rate": 3.963165075034106e-06, |
|
"loss": 0.7072, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5342099856174235, |
|
"eval_loss": 0.7128139734268188, |
|
"eval_macro_f1_score": 0.38961014388267884, |
|
"eval_micro_f1_score": 0.8093200740055504, |
|
"eval_runtime": 65.8124, |
|
"eval_samples_per_second": 131.48, |
|
"eval_steps_per_second": 16.441, |
|
"eval_weighted_f1_score": 0.8020259006536976, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5753030614341483, |
|
"grad_norm": 25.068248748779297, |
|
"learning_rate": 1.5757162346521146e-06, |
|
"loss": 0.7256, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5753030614341483, |
|
"eval_loss": 0.7055332064628601, |
|
"eval_macro_f1_score": 0.3934822923181016, |
|
"eval_micro_f1_score": 0.8125578168362627, |
|
"eval_runtime": 65.9442, |
|
"eval_samples_per_second": 131.217, |
|
"eval_steps_per_second": 16.408, |
|
"eval_weighted_f1_score": 0.8039225388312238, |
|
"step": 1400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1466, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2607458880921600.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|