|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.955033824114604, |
|
"eval_steps": 200, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03979307600477517, |
|
"grad_norm": 19.560626983642578, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9524, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07958615200955034, |
|
"grad_norm": 10.810285568237305, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.7019, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07958615200955034, |
|
"eval_accuracy": 0.6237137511693172, |
|
"eval_loss": 0.6542279124259949, |
|
"eval_runtime": 24.2722, |
|
"eval_samples_per_second": 352.337, |
|
"eval_steps_per_second": 5.521, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1193792280143255, |
|
"grad_norm": 6.921683311462402, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.6443, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15917230401910068, |
|
"grad_norm": 5.143254280090332, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.6156, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15917230401910068, |
|
"eval_accuracy": 0.6449953227315248, |
|
"eval_loss": 0.6248486638069153, |
|
"eval_runtime": 24.3077, |
|
"eval_samples_per_second": 351.823, |
|
"eval_steps_per_second": 5.513, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19896538002387584, |
|
"grad_norm": 4.483047962188721, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6307, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.238758456028651, |
|
"grad_norm": 3.8908305168151855, |
|
"learning_rate": 1.999004191733529e-05, |
|
"loss": 0.6263, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.238758456028651, |
|
"eval_accuracy": 0.641955098222638, |
|
"eval_loss": 0.6327589750289917, |
|
"eval_runtime": 24.3113, |
|
"eval_samples_per_second": 351.771, |
|
"eval_steps_per_second": 5.512, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2785515320334262, |
|
"grad_norm": 2.846820831298828, |
|
"learning_rate": 1.9960187502023228e-05, |
|
"loss": 0.6237, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.31834460803820136, |
|
"grad_norm": 2.4224681854248047, |
|
"learning_rate": 1.991049621261093e-05, |
|
"loss": 0.6237, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.31834460803820136, |
|
"eval_accuracy": 0.6501403180542563, |
|
"eval_loss": 0.6154850721359253, |
|
"eval_runtime": 24.2938, |
|
"eval_samples_per_second": 352.024, |
|
"eval_steps_per_second": 5.516, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3581376840429765, |
|
"grad_norm": 3.673722267150879, |
|
"learning_rate": 1.9841067015091934e-05, |
|
"loss": 0.61, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3979307600477517, |
|
"grad_norm": 4.1186299324035645, |
|
"learning_rate": 1.975203818580389e-05, |
|
"loss": 0.6091, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3979307600477517, |
|
"eval_accuracy": 0.6554022450888681, |
|
"eval_loss": 0.6121359467506409, |
|
"eval_runtime": 24.2673, |
|
"eval_samples_per_second": 352.408, |
|
"eval_steps_per_second": 5.522, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.43772383605252685, |
|
"grad_norm": 3.4261631965637207, |
|
"learning_rate": 1.964358703603511e-05, |
|
"loss": 0.6112, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.477516912057302, |
|
"grad_norm": 2.712700128555298, |
|
"learning_rate": 1.9515929558888497e-05, |
|
"loss": 0.6121, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.477516912057302, |
|
"eval_accuracy": 0.659377923292797, |
|
"eval_loss": 0.606299638748169, |
|
"eval_runtime": 24.2915, |
|
"eval_samples_per_second": 352.058, |
|
"eval_steps_per_second": 5.516, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5173099880620772, |
|
"grad_norm": 4.298662185668945, |
|
"learning_rate": 1.936931999910609e-05, |
|
"loss": 0.6026, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5571030640668524, |
|
"grad_norm": 2.0829362869262695, |
|
"learning_rate": 1.9204050346711034e-05, |
|
"loss": 0.6022, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5571030640668524, |
|
"eval_accuracy": 0.653999064546305, |
|
"eval_loss": 0.6064969301223755, |
|
"eval_runtime": 24.3164, |
|
"eval_samples_per_second": 351.697, |
|
"eval_steps_per_second": 5.511, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5968961400716275, |
|
"grad_norm": 3.5935146808624268, |
|
"learning_rate": 1.9020449755475434e-05, |
|
"loss": 0.6059, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6366892160764027, |
|
"grad_norm": 2.9716155529022217, |
|
"learning_rate": 1.881888388737226e-05, |
|
"loss": 0.5957, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6366892160764027, |
|
"eval_accuracy": 0.6696679139382601, |
|
"eval_loss": 0.5993675589561462, |
|
"eval_runtime": 24.2743, |
|
"eval_samples_per_second": 352.307, |
|
"eval_steps_per_second": 5.52, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6764822920811778, |
|
"grad_norm": 2.7621397972106934, |
|
"learning_rate": 1.859975418431689e-05, |
|
"loss": 0.5926, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.716275368085953, |
|
"grad_norm": 2.882694721221924, |
|
"learning_rate": 1.8363497068648795e-05, |
|
"loss": 0.6017, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.716275368085953, |
|
"eval_accuracy": 0.6667446211412535, |
|
"eval_loss": 0.6018164157867432, |
|
"eval_runtime": 24.2955, |
|
"eval_samples_per_second": 352.0, |
|
"eval_steps_per_second": 5.515, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7560684440907283, |
|
"grad_norm": 3.4406116008758545, |
|
"learning_rate": 1.8110583073945566e-05, |
|
"loss": 0.5994, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7958615200955034, |
|
"grad_norm": 3.2141339778900146, |
|
"learning_rate": 1.7841515907900467e-05, |
|
"loss": 0.5902, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7958615200955034, |
|
"eval_accuracy": 0.6710710944808232, |
|
"eval_loss": 0.6057897210121155, |
|
"eval_runtime": 24.3126, |
|
"eval_samples_per_second": 351.752, |
|
"eval_steps_per_second": 5.512, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8356545961002786, |
|
"grad_norm": 2.75370192527771, |
|
"learning_rate": 1.755683144912986e-05, |
|
"loss": 0.6024, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.8754476721050537, |
|
"grad_norm": 4.044663429260254, |
|
"learning_rate": 1.725709667990851e-05, |
|
"loss": 0.59, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8754476721050537, |
|
"eval_accuracy": 0.6708372310570627, |
|
"eval_loss": 0.594099760055542, |
|
"eval_runtime": 24.3163, |
|
"eval_samples_per_second": 351.699, |
|
"eval_steps_per_second": 5.511, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9152407481098289, |
|
"grad_norm": 3.4913995265960693, |
|
"learning_rate": 1.6942908556958297e-05, |
|
"loss": 0.5919, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.955033824114604, |
|
"grad_norm": 3.277291774749756, |
|
"learning_rate": 1.6614892822539333e-05, |
|
"loss": 0.5963, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.955033824114604, |
|
"eval_accuracy": 0.6722404115996258, |
|
"eval_loss": 0.5924503207206726, |
|
"eval_runtime": 24.2851, |
|
"eval_samples_per_second": 352.151, |
|
"eval_steps_per_second": 5.518, |
|
"step": 2400 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7539, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|