|
{ |
|
"best_metric": 0.61792588, |
|
"best_model_checkpoint": "/data/project/ys/swift/output/DZJ6B_base/v2-20240821-171924/checkpoint-100", |
|
"epoch": 2.983240223463687, |
|
"eval_steps": 100, |
|
"global_step": 267, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"acc": 0.81204844, |
|
"epoch": 0.0111731843575419, |
|
"grad_norm": 7.0027059641490315, |
|
"learning_rate": 0.0, |
|
"loss": 0.80392802, |
|
"memory(GiB)": 65.61, |
|
"step": 1, |
|
"train_speed(iter/s)": 0.016003 |
|
}, |
|
{ |
|
"acc": 0.80695226, |
|
"epoch": 0.11173184357541899, |
|
"grad_norm": 0.891187038971179, |
|
"learning_rate": 0.0001, |
|
"loss": 0.72480848, |
|
"memory(GiB)": 77.56, |
|
"step": 10, |
|
"train_speed(iter/s)": 0.019435 |
|
}, |
|
{ |
|
"acc": 0.82287941, |
|
"epoch": 0.22346368715083798, |
|
"grad_norm": 0.4166817107409061, |
|
"learning_rate": 9.612403100775195e-05, |
|
"loss": 0.65245237, |
|
"memory(GiB)": 71.22, |
|
"step": 20, |
|
"train_speed(iter/s)": 0.019764 |
|
}, |
|
{ |
|
"acc": 0.82848577, |
|
"epoch": 0.33519553072625696, |
|
"grad_norm": 0.3476093112050191, |
|
"learning_rate": 9.224806201550387e-05, |
|
"loss": 0.62454967, |
|
"memory(GiB)": 55.52, |
|
"step": 30, |
|
"train_speed(iter/s)": 0.019918 |
|
}, |
|
{ |
|
"acc": 0.83262348, |
|
"epoch": 0.44692737430167595, |
|
"grad_norm": 0.3040300336242288, |
|
"learning_rate": 8.837209302325582e-05, |
|
"loss": 0.60746822, |
|
"memory(GiB)": 55.52, |
|
"step": 40, |
|
"train_speed(iter/s)": 0.020099 |
|
}, |
|
{ |
|
"acc": 0.8341733, |
|
"epoch": 0.5586592178770949, |
|
"grad_norm": 0.2927188576333117, |
|
"learning_rate": 8.449612403100775e-05, |
|
"loss": 0.59925413, |
|
"memory(GiB)": 71.5, |
|
"step": 50, |
|
"train_speed(iter/s)": 0.02013 |
|
}, |
|
{ |
|
"acc": 0.83600845, |
|
"epoch": 0.6703910614525139, |
|
"grad_norm": 0.38995740991597366, |
|
"learning_rate": 8.062015503875969e-05, |
|
"loss": 0.59135432, |
|
"memory(GiB)": 55.54, |
|
"step": 60, |
|
"train_speed(iter/s)": 0.02016 |
|
}, |
|
{ |
|
"acc": 0.83653011, |
|
"epoch": 0.7821229050279329, |
|
"grad_norm": 0.3224959350008302, |
|
"learning_rate": 7.674418604651163e-05, |
|
"loss": 0.58722138, |
|
"memory(GiB)": 55.54, |
|
"step": 70, |
|
"train_speed(iter/s)": 0.020203 |
|
}, |
|
{ |
|
"acc": 0.83982677, |
|
"epoch": 0.8938547486033519, |
|
"grad_norm": 0.2717501594592104, |
|
"learning_rate": 7.286821705426357e-05, |
|
"loss": 0.57504473, |
|
"memory(GiB)": 55.54, |
|
"step": 80, |
|
"train_speed(iter/s)": 0.020198 |
|
}, |
|
{ |
|
"acc": 0.84336185, |
|
"epoch": 1.005586592178771, |
|
"grad_norm": 0.38777497021142354, |
|
"learning_rate": 6.89922480620155e-05, |
|
"loss": 0.56000357, |
|
"memory(GiB)": 55.54, |
|
"step": 90, |
|
"train_speed(iter/s)": 0.020243 |
|
}, |
|
{ |
|
"acc": 0.88669682, |
|
"epoch": 1.1173184357541899, |
|
"grad_norm": 0.2800447265610427, |
|
"learning_rate": 6.511627906976745e-05, |
|
"loss": 0.39480281, |
|
"memory(GiB)": 55.54, |
|
"step": 100, |
|
"train_speed(iter/s)": 0.020243 |
|
}, |
|
{ |
|
"epoch": 1.1173184357541899, |
|
"eval_acc": 0.8374920610261495, |
|
"eval_loss": 0.6179258823394775, |
|
"eval_runtime": 14.3303, |
|
"eval_samples_per_second": 31.89, |
|
"eval_steps_per_second": 0.279, |
|
"step": 100 |
|
}, |
|
{ |
|
"acc": 0.88833666, |
|
"epoch": 1.229050279329609, |
|
"grad_norm": 0.2666926991713443, |
|
"learning_rate": 6.124031007751938e-05, |
|
"loss": 0.38764906, |
|
"memory(GiB)": 56.39, |
|
"step": 110, |
|
"train_speed(iter/s)": 0.020132 |
|
}, |
|
{ |
|
"acc": 0.88940392, |
|
"epoch": 1.3407821229050279, |
|
"grad_norm": 0.2666536892955014, |
|
"learning_rate": 5.736434108527132e-05, |
|
"loss": 0.38203318, |
|
"memory(GiB)": 72.39, |
|
"step": 120, |
|
"train_speed(iter/s)": 0.020153 |
|
}, |
|
{ |
|
"acc": 0.88888655, |
|
"epoch": 1.452513966480447, |
|
"grad_norm": 0.2511228236819602, |
|
"learning_rate": 5.348837209302326e-05, |
|
"loss": 0.38319407, |
|
"memory(GiB)": 55.62, |
|
"step": 130, |
|
"train_speed(iter/s)": 0.020142 |
|
}, |
|
{ |
|
"acc": 0.88929482, |
|
"epoch": 1.564245810055866, |
|
"grad_norm": 0.250375456577922, |
|
"learning_rate": 4.96124031007752e-05, |
|
"loss": 0.38362105, |
|
"memory(GiB)": 55.62, |
|
"step": 140, |
|
"train_speed(iter/s)": 0.020157 |
|
}, |
|
{ |
|
"acc": 0.89008141, |
|
"epoch": 1.675977653631285, |
|
"grad_norm": 0.2664008940638054, |
|
"learning_rate": 4.573643410852713e-05, |
|
"loss": 0.37961533, |
|
"memory(GiB)": 63.63, |
|
"step": 150, |
|
"train_speed(iter/s)": 0.02017 |
|
}, |
|
{ |
|
"acc": 0.88946552, |
|
"epoch": 1.7877094972067038, |
|
"grad_norm": 0.2568604002281673, |
|
"learning_rate": 4.186046511627907e-05, |
|
"loss": 0.38261704, |
|
"memory(GiB)": 63.63, |
|
"step": 160, |
|
"train_speed(iter/s)": 0.02016 |
|
}, |
|
{ |
|
"acc": 0.89207363, |
|
"epoch": 1.899441340782123, |
|
"grad_norm": 0.25110691370775395, |
|
"learning_rate": 3.798449612403101e-05, |
|
"loss": 0.37335744, |
|
"memory(GiB)": 63.63, |
|
"step": 170, |
|
"train_speed(iter/s)": 0.020162 |
|
}, |
|
{ |
|
"acc": 0.89511375, |
|
"epoch": 2.011173184357542, |
|
"grad_norm": 0.4578108117725898, |
|
"learning_rate": 3.4108527131782945e-05, |
|
"loss": 0.36452789, |
|
"memory(GiB)": 63.63, |
|
"step": 180, |
|
"train_speed(iter/s)": 0.02019 |
|
}, |
|
{ |
|
"acc": 0.93461123, |
|
"epoch": 2.122905027932961, |
|
"grad_norm": 0.29379733174286393, |
|
"learning_rate": 3.0232558139534883e-05, |
|
"loss": 0.22719576, |
|
"memory(GiB)": 63.63, |
|
"step": 190, |
|
"train_speed(iter/s)": 0.020204 |
|
}, |
|
{ |
|
"acc": 0.93616581, |
|
"epoch": 2.2346368715083798, |
|
"grad_norm": 0.2816020644085949, |
|
"learning_rate": 2.6356589147286826e-05, |
|
"loss": 0.22034373, |
|
"memory(GiB)": 63.63, |
|
"step": 200, |
|
"train_speed(iter/s)": 0.020189 |
|
}, |
|
{ |
|
"epoch": 2.2346368715083798, |
|
"eval_acc": 0.8397126891074994, |
|
"eval_loss": 0.6819891929626465, |
|
"eval_runtime": 14.3111, |
|
"eval_samples_per_second": 31.933, |
|
"eval_steps_per_second": 0.28, |
|
"step": 200 |
|
}, |
|
{ |
|
"acc": 0.93667021, |
|
"epoch": 2.346368715083799, |
|
"grad_norm": 0.27143644801177985, |
|
"learning_rate": 2.2480620155038764e-05, |
|
"loss": 0.21893153, |
|
"memory(GiB)": 63.63, |
|
"step": 210, |
|
"train_speed(iter/s)": 0.020144 |
|
}, |
|
{ |
|
"acc": 0.9375206, |
|
"epoch": 2.458100558659218, |
|
"grad_norm": 0.24768232763167009, |
|
"learning_rate": 1.8604651162790697e-05, |
|
"loss": 0.21632226, |
|
"memory(GiB)": 63.63, |
|
"step": 220, |
|
"train_speed(iter/s)": 0.020144 |
|
}, |
|
{ |
|
"acc": 0.93809719, |
|
"epoch": 2.5698324022346366, |
|
"grad_norm": 0.24790064179599028, |
|
"learning_rate": 1.4728682170542638e-05, |
|
"loss": 0.21427879, |
|
"memory(GiB)": 63.63, |
|
"step": 230, |
|
"train_speed(iter/s)": 0.02015 |
|
}, |
|
{ |
|
"acc": 0.93803339, |
|
"epoch": 2.6815642458100557, |
|
"grad_norm": 0.24686113799484746, |
|
"learning_rate": 1.0852713178294575e-05, |
|
"loss": 0.21534572, |
|
"memory(GiB)": 63.63, |
|
"step": 240, |
|
"train_speed(iter/s)": 0.020161 |
|
}, |
|
{ |
|
"acc": 0.93708591, |
|
"epoch": 2.793296089385475, |
|
"grad_norm": 0.2496609236672877, |
|
"learning_rate": 6.976744186046512e-06, |
|
"loss": 0.21791611, |
|
"memory(GiB)": 63.63, |
|
"step": 250, |
|
"train_speed(iter/s)": 0.020172 |
|
}, |
|
{ |
|
"acc": 0.93700886, |
|
"epoch": 2.905027932960894, |
|
"grad_norm": 0.24396510705233476, |
|
"learning_rate": 3.10077519379845e-06, |
|
"loss": 0.21976945, |
|
"memory(GiB)": 63.63, |
|
"step": 260, |
|
"train_speed(iter/s)": 0.020173 |
|
}, |
|
{ |
|
"epoch": 2.983240223463687, |
|
"eval_acc": 0.8414855362177129, |
|
"eval_loss": 0.6801542639732361, |
|
"eval_runtime": 14.3187, |
|
"eval_samples_per_second": 31.916, |
|
"eval_steps_per_second": 0.279, |
|
"step": 267 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 267, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 267385770803200.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|