Phi-3.5-Chinese / trainer_state.json
qingmai's picture
Init
f8a5226
raw
history blame
8.53 kB
{
"best_metric": 0.61792588,
"best_model_checkpoint": "/data/project/ys/swift/output/DZJ6B_base/v2-20240821-171924/checkpoint-100",
"epoch": 2.983240223463687,
"eval_steps": 100,
"global_step": 267,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.81204844,
"epoch": 0.0111731843575419,
"grad_norm": 7.0027059641490315,
"learning_rate": 0.0,
"loss": 0.80392802,
"memory(GiB)": 65.61,
"step": 1,
"train_speed(iter/s)": 0.016003
},
{
"acc": 0.80695226,
"epoch": 0.11173184357541899,
"grad_norm": 0.891187038971179,
"learning_rate": 0.0001,
"loss": 0.72480848,
"memory(GiB)": 77.56,
"step": 10,
"train_speed(iter/s)": 0.019435
},
{
"acc": 0.82287941,
"epoch": 0.22346368715083798,
"grad_norm": 0.4166817107409061,
"learning_rate": 9.612403100775195e-05,
"loss": 0.65245237,
"memory(GiB)": 71.22,
"step": 20,
"train_speed(iter/s)": 0.019764
},
{
"acc": 0.82848577,
"epoch": 0.33519553072625696,
"grad_norm": 0.3476093112050191,
"learning_rate": 9.224806201550387e-05,
"loss": 0.62454967,
"memory(GiB)": 55.52,
"step": 30,
"train_speed(iter/s)": 0.019918
},
{
"acc": 0.83262348,
"epoch": 0.44692737430167595,
"grad_norm": 0.3040300336242288,
"learning_rate": 8.837209302325582e-05,
"loss": 0.60746822,
"memory(GiB)": 55.52,
"step": 40,
"train_speed(iter/s)": 0.020099
},
{
"acc": 0.8341733,
"epoch": 0.5586592178770949,
"grad_norm": 0.2927188576333117,
"learning_rate": 8.449612403100775e-05,
"loss": 0.59925413,
"memory(GiB)": 71.5,
"step": 50,
"train_speed(iter/s)": 0.02013
},
{
"acc": 0.83600845,
"epoch": 0.6703910614525139,
"grad_norm": 0.38995740991597366,
"learning_rate": 8.062015503875969e-05,
"loss": 0.59135432,
"memory(GiB)": 55.54,
"step": 60,
"train_speed(iter/s)": 0.02016
},
{
"acc": 0.83653011,
"epoch": 0.7821229050279329,
"grad_norm": 0.3224959350008302,
"learning_rate": 7.674418604651163e-05,
"loss": 0.58722138,
"memory(GiB)": 55.54,
"step": 70,
"train_speed(iter/s)": 0.020203
},
{
"acc": 0.83982677,
"epoch": 0.8938547486033519,
"grad_norm": 0.2717501594592104,
"learning_rate": 7.286821705426357e-05,
"loss": 0.57504473,
"memory(GiB)": 55.54,
"step": 80,
"train_speed(iter/s)": 0.020198
},
{
"acc": 0.84336185,
"epoch": 1.005586592178771,
"grad_norm": 0.38777497021142354,
"learning_rate": 6.89922480620155e-05,
"loss": 0.56000357,
"memory(GiB)": 55.54,
"step": 90,
"train_speed(iter/s)": 0.020243
},
{
"acc": 0.88669682,
"epoch": 1.1173184357541899,
"grad_norm": 0.2800447265610427,
"learning_rate": 6.511627906976745e-05,
"loss": 0.39480281,
"memory(GiB)": 55.54,
"step": 100,
"train_speed(iter/s)": 0.020243
},
{
"epoch": 1.1173184357541899,
"eval_acc": 0.8374920610261495,
"eval_loss": 0.6179258823394775,
"eval_runtime": 14.3303,
"eval_samples_per_second": 31.89,
"eval_steps_per_second": 0.279,
"step": 100
},
{
"acc": 0.88833666,
"epoch": 1.229050279329609,
"grad_norm": 0.2666926991713443,
"learning_rate": 6.124031007751938e-05,
"loss": 0.38764906,
"memory(GiB)": 56.39,
"step": 110,
"train_speed(iter/s)": 0.020132
},
{
"acc": 0.88940392,
"epoch": 1.3407821229050279,
"grad_norm": 0.2666536892955014,
"learning_rate": 5.736434108527132e-05,
"loss": 0.38203318,
"memory(GiB)": 72.39,
"step": 120,
"train_speed(iter/s)": 0.020153
},
{
"acc": 0.88888655,
"epoch": 1.452513966480447,
"grad_norm": 0.2511228236819602,
"learning_rate": 5.348837209302326e-05,
"loss": 0.38319407,
"memory(GiB)": 55.62,
"step": 130,
"train_speed(iter/s)": 0.020142
},
{
"acc": 0.88929482,
"epoch": 1.564245810055866,
"grad_norm": 0.250375456577922,
"learning_rate": 4.96124031007752e-05,
"loss": 0.38362105,
"memory(GiB)": 55.62,
"step": 140,
"train_speed(iter/s)": 0.020157
},
{
"acc": 0.89008141,
"epoch": 1.675977653631285,
"grad_norm": 0.2664008940638054,
"learning_rate": 4.573643410852713e-05,
"loss": 0.37961533,
"memory(GiB)": 63.63,
"step": 150,
"train_speed(iter/s)": 0.02017
},
{
"acc": 0.88946552,
"epoch": 1.7877094972067038,
"grad_norm": 0.2568604002281673,
"learning_rate": 4.186046511627907e-05,
"loss": 0.38261704,
"memory(GiB)": 63.63,
"step": 160,
"train_speed(iter/s)": 0.02016
},
{
"acc": 0.89207363,
"epoch": 1.899441340782123,
"grad_norm": 0.25110691370775395,
"learning_rate": 3.798449612403101e-05,
"loss": 0.37335744,
"memory(GiB)": 63.63,
"step": 170,
"train_speed(iter/s)": 0.020162
},
{
"acc": 0.89511375,
"epoch": 2.011173184357542,
"grad_norm": 0.4578108117725898,
"learning_rate": 3.4108527131782945e-05,
"loss": 0.36452789,
"memory(GiB)": 63.63,
"step": 180,
"train_speed(iter/s)": 0.02019
},
{
"acc": 0.93461123,
"epoch": 2.122905027932961,
"grad_norm": 0.29379733174286393,
"learning_rate": 3.0232558139534883e-05,
"loss": 0.22719576,
"memory(GiB)": 63.63,
"step": 190,
"train_speed(iter/s)": 0.020204
},
{
"acc": 0.93616581,
"epoch": 2.2346368715083798,
"grad_norm": 0.2816020644085949,
"learning_rate": 2.6356589147286826e-05,
"loss": 0.22034373,
"memory(GiB)": 63.63,
"step": 200,
"train_speed(iter/s)": 0.020189
},
{
"epoch": 2.2346368715083798,
"eval_acc": 0.8397126891074994,
"eval_loss": 0.6819891929626465,
"eval_runtime": 14.3111,
"eval_samples_per_second": 31.933,
"eval_steps_per_second": 0.28,
"step": 200
},
{
"acc": 0.93667021,
"epoch": 2.346368715083799,
"grad_norm": 0.27143644801177985,
"learning_rate": 2.2480620155038764e-05,
"loss": 0.21893153,
"memory(GiB)": 63.63,
"step": 210,
"train_speed(iter/s)": 0.020144
},
{
"acc": 0.9375206,
"epoch": 2.458100558659218,
"grad_norm": 0.24768232763167009,
"learning_rate": 1.8604651162790697e-05,
"loss": 0.21632226,
"memory(GiB)": 63.63,
"step": 220,
"train_speed(iter/s)": 0.020144
},
{
"acc": 0.93809719,
"epoch": 2.5698324022346366,
"grad_norm": 0.24790064179599028,
"learning_rate": 1.4728682170542638e-05,
"loss": 0.21427879,
"memory(GiB)": 63.63,
"step": 230,
"train_speed(iter/s)": 0.02015
},
{
"acc": 0.93803339,
"epoch": 2.6815642458100557,
"grad_norm": 0.24686113799484746,
"learning_rate": 1.0852713178294575e-05,
"loss": 0.21534572,
"memory(GiB)": 63.63,
"step": 240,
"train_speed(iter/s)": 0.020161
},
{
"acc": 0.93708591,
"epoch": 2.793296089385475,
"grad_norm": 0.2496609236672877,
"learning_rate": 6.976744186046512e-06,
"loss": 0.21791611,
"memory(GiB)": 63.63,
"step": 250,
"train_speed(iter/s)": 0.020172
},
{
"acc": 0.93700886,
"epoch": 2.905027932960894,
"grad_norm": 0.24396510705233476,
"learning_rate": 3.10077519379845e-06,
"loss": 0.21976945,
"memory(GiB)": 63.63,
"step": 260,
"train_speed(iter/s)": 0.020173
},
{
"epoch": 2.983240223463687,
"eval_acc": 0.8414855362177129,
"eval_loss": 0.6801542639732361,
"eval_runtime": 14.3187,
"eval_samples_per_second": 31.916,
"eval_steps_per_second": 0.279,
"step": 267
}
],
"logging_steps": 10,
"max_steps": 267,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 267385770803200.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}