|
{
|
|
"best_metric": 1.52509904,
|
|
"best_model_checkpoint": "D:\\_____NEW_NN\\LLM\\MiniCPM-V\\finetune\\output\\phi3-vision-128k-instruct\\v0-20240629-080216\\checkpoint-300",
|
|
"epoch": 3.5225048923679063,
|
|
"eval_steps": 50,
|
|
"global_step": 900,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"acc": 0.49833804,
|
|
"epoch": 0.003913894324853229,
|
|
"grad_norm": 0.77734375,
|
|
"learning_rate": 2.745098039215686e-06,
|
|
"loss": 2.37747383,
|
|
"memory(GiB)": 17.35,
|
|
"step": 1,
|
|
"train_speed(iter/s)": 0.076826
|
|
},
|
|
{
|
|
"acc": 0.50652587,
|
|
"epoch": 0.019569471624266144,
|
|
"grad_norm": 1.140625,
|
|
"learning_rate": 1.372549019607843e-05,
|
|
"loss": 2.29183841,
|
|
"memory(GiB)": 19.33,
|
|
"step": 5,
|
|
"train_speed(iter/s)": 0.082188
|
|
},
|
|
{
|
|
"acc": 0.52587533,
|
|
"epoch": 0.03913894324853229,
|
|
"grad_norm": 0.68359375,
|
|
"learning_rate": 2.745098039215686e-05,
|
|
"loss": 2.22724895,
|
|
"memory(GiB)": 19.89,
|
|
"step": 10,
|
|
"train_speed(iter/s)": 0.082805
|
|
},
|
|
{
|
|
"acc": 0.52128973,
|
|
"epoch": 0.05870841487279843,
|
|
"grad_norm": 0.8359375,
|
|
"learning_rate": 4.117647058823529e-05,
|
|
"loss": 2.27491264,
|
|
"memory(GiB)": 19.24,
|
|
"step": 15,
|
|
"train_speed(iter/s)": 0.082482
|
|
},
|
|
{
|
|
"acc": 0.51135335,
|
|
"epoch": 0.07827788649706457,
|
|
"grad_norm": 0.66015625,
|
|
"learning_rate": 5.490196078431372e-05,
|
|
"loss": 2.32762127,
|
|
"memory(GiB)": 19.86,
|
|
"step": 20,
|
|
"train_speed(iter/s)": 0.082557
|
|
},
|
|
{
|
|
"acc": 0.54442377,
|
|
"epoch": 0.09784735812133072,
|
|
"grad_norm": 0.65625,
|
|
"learning_rate": 6.862745098039214e-05,
|
|
"loss": 2.09772224,
|
|
"memory(GiB)": 19.05,
|
|
"step": 25,
|
|
"train_speed(iter/s)": 0.082348
|
|
},
|
|
{
|
|
"acc": 0.5545311,
|
|
"epoch": 0.11741682974559686,
|
|
"grad_norm": 0.62109375,
|
|
"learning_rate": 8.235294117647058e-05,
|
|
"loss": 2.00072975,
|
|
"memory(GiB)": 19.89,
|
|
"step": 30,
|
|
"train_speed(iter/s)": 0.082166
|
|
},
|
|
{
|
|
"acc": 0.57092514,
|
|
"epoch": 0.136986301369863,
|
|
"grad_norm": 0.9296875,
|
|
"learning_rate": 9.6078431372549e-05,
|
|
"loss": 1.94450474,
|
|
"memory(GiB)": 19.16,
|
|
"step": 35,
|
|
"train_speed(iter/s)": 0.081966
|
|
},
|
|
{
|
|
"acc": 0.56716595,
|
|
"epoch": 0.15655577299412915,
|
|
"grad_norm": 0.7734375,
|
|
"learning_rate": 0.00010980392156862745,
|
|
"loss": 1.90242462,
|
|
"memory(GiB)": 19.62,
|
|
"step": 40,
|
|
"train_speed(iter/s)": 0.081987
|
|
},
|
|
{
|
|
"acc": 0.57822714,
|
|
"epoch": 0.1761252446183953,
|
|
"grad_norm": 0.74609375,
|
|
"learning_rate": 0.00012352941176470587,
|
|
"loss": 1.83147659,
|
|
"memory(GiB)": 19.99,
|
|
"step": 45,
|
|
"train_speed(iter/s)": 0.081878
|
|
},
|
|
{
|
|
"acc": 0.57696896,
|
|
"epoch": 0.19569471624266144,
|
|
"grad_norm": 0.85546875,
|
|
"learning_rate": 0.00013725490196078428,
|
|
"loss": 1.82299595,
|
|
"memory(GiB)": 19.11,
|
|
"step": 50,
|
|
"train_speed(iter/s)": 0.081843
|
|
},
|
|
{
|
|
"epoch": 0.19569471624266144,
|
|
"eval_acc": 0.583503534956795,
|
|
"eval_loss": 1.8029242753982544,
|
|
"eval_runtime": 85.1254,
|
|
"eval_samples_per_second": 0.893,
|
|
"eval_steps_per_second": 0.446,
|
|
"step": 50
|
|
},
|
|
{
|
|
"acc": 0.59343066,
|
|
"epoch": 0.21526418786692758,
|
|
"grad_norm": 1.0,
|
|
"learning_rate": 0.0001399941138119636,
|
|
"loss": 1.82339039,
|
|
"memory(GiB)": 22.92,
|
|
"step": 55,
|
|
"train_speed(iter/s)": 0.072544
|
|
},
|
|
{
|
|
"acc": 0.58571839,
|
|
"epoch": 0.23483365949119372,
|
|
"grad_norm": 0.7734375,
|
|
"learning_rate": 0.00013997020286964757,
|
|
"loss": 1.80549526,
|
|
"memory(GiB)": 19.43,
|
|
"step": 60,
|
|
"train_speed(iter/s)": 0.073269
|
|
},
|
|
{
|
|
"acc": 0.60369935,
|
|
"epoch": 0.25440313111545987,
|
|
"grad_norm": 0.99609375,
|
|
"learning_rate": 0.0001399279055646442,
|
|
"loss": 1.6768074,
|
|
"memory(GiB)": 19.57,
|
|
"step": 65,
|
|
"train_speed(iter/s)": 0.073897
|
|
},
|
|
{
|
|
"acc": 0.58763909,
|
|
"epoch": 0.273972602739726,
|
|
"grad_norm": 1.1640625,
|
|
"learning_rate": 0.00013986723301159307,
|
|
"loss": 1.79169483,
|
|
"memory(GiB)": 19.48,
|
|
"step": 70,
|
|
"train_speed(iter/s)": 0.074533
|
|
},
|
|
{
|
|
"acc": 0.58979025,
|
|
"epoch": 0.29354207436399216,
|
|
"grad_norm": 0.69140625,
|
|
"learning_rate": 0.00013978820115367462,
|
|
"loss": 1.72388344,
|
|
"memory(GiB)": 19.35,
|
|
"step": 75,
|
|
"train_speed(iter/s)": 0.075045
|
|
},
|
|
{
|
|
"acc": 0.59725327,
|
|
"epoch": 0.3131115459882583,
|
|
"grad_norm": 0.75,
|
|
"learning_rate": 0.00013969083075842048,
|
|
"loss": 1.70864868,
|
|
"memory(GiB)": 19.49,
|
|
"step": 80,
|
|
"train_speed(iter/s)": 0.075523
|
|
},
|
|
{
|
|
"acc": 0.60098982,
|
|
"epoch": 0.33268101761252444,
|
|
"grad_norm": 4.59375,
|
|
"learning_rate": 0.00013957514741225646,
|
|
"loss": 1.67311764,
|
|
"memory(GiB)": 20.01,
|
|
"step": 85,
|
|
"train_speed(iter/s)": 0.075928
|
|
},
|
|
{
|
|
"acc": 0.58315139,
|
|
"epoch": 0.3522504892367906,
|
|
"grad_norm": 0.8359375,
|
|
"learning_rate": 0.00013944118151377894,
|
|
"loss": 1.74437752,
|
|
"memory(GiB)": 20.14,
|
|
"step": 90,
|
|
"train_speed(iter/s)": 0.076154
|
|
},
|
|
{
|
|
"acc": 0.6138227,
|
|
"epoch": 0.37181996086105673,
|
|
"grad_norm": 0.75,
|
|
"learning_rate": 0.0001392889682657671,
|
|
"loss": 1.63750076,
|
|
"memory(GiB)": 19.59,
|
|
"step": 95,
|
|
"train_speed(iter/s)": 0.076253
|
|
},
|
|
{
|
|
"acc": 0.63383026,
|
|
"epoch": 0.3913894324853229,
|
|
"grad_norm": 0.8515625,
|
|
"learning_rate": 0.00013911854766593233,
|
|
"loss": 1.56653557,
|
|
"memory(GiB)": 19.5,
|
|
"step": 100,
|
|
"train_speed(iter/s)": 0.076386
|
|
},
|
|
{
|
|
"epoch": 0.3913894324853229,
|
|
"eval_acc": 0.604241948153967,
|
|
"eval_loss": 1.6681365966796875,
|
|
"eval_runtime": 72.2811,
|
|
"eval_samples_per_second": 1.051,
|
|
"eval_steps_per_second": 0.526,
|
|
"step": 100
|
|
},
|
|
{
|
|
"acc": 0.61646304,
|
|
"epoch": 0.410958904109589,
|
|
"grad_norm": 0.73046875,
|
|
"learning_rate": 0.00013892996449640807,
|
|
"loss": 1.59651537,
|
|
"memory(GiB)": 22.5,
|
|
"step": 105,
|
|
"train_speed(iter/s)": 0.072857
|
|
},
|
|
{
|
|
"acc": 0.60897431,
|
|
"epoch": 0.43052837573385516,
|
|
"grad_norm": 0.83984375,
|
|
"learning_rate": 0.00013872326831198205,
|
|
"loss": 1.70257473,
|
|
"memory(GiB)": 19.42,
|
|
"step": 110,
|
|
"train_speed(iter/s)": 0.073309
|
|
},
|
|
{
|
|
"acc": 0.58328586,
|
|
"epoch": 0.4500978473581213,
|
|
"grad_norm": 0.9453125,
|
|
"learning_rate": 0.00013849851342707462,
|
|
"loss": 1.71216717,
|
|
"memory(GiB)": 19.47,
|
|
"step": 115,
|
|
"train_speed(iter/s)": 0.073753
|
|
},
|
|
{
|
|
"acc": 0.62397904,
|
|
"epoch": 0.46966731898238745,
|
|
"grad_norm": 0.80078125,
|
|
"learning_rate": 0.0001382557589014664,
|
|
"loss": 1.54239073,
|
|
"memory(GiB)": 19.33,
|
|
"step": 120,
|
|
"train_speed(iter/s)": 0.074078
|
|
},
|
|
{
|
|
"acc": 0.60271235,
|
|
"epoch": 0.4892367906066536,
|
|
"grad_norm": 1.171875,
|
|
"learning_rate": 0.0001379950685247788,
|
|
"loss": 1.72333088,
|
|
"memory(GiB)": 19.37,
|
|
"step": 125,
|
|
"train_speed(iter/s)": 0.074428
|
|
},
|
|
{
|
|
"acc": 0.5755064,
|
|
"epoch": 0.5088062622309197,
|
|
"grad_norm": 0.94921875,
|
|
"learning_rate": 0.00013771651079971182,
|
|
"loss": 1.81728477,
|
|
"memory(GiB)": 19.52,
|
|
"step": 130,
|
|
"train_speed(iter/s)": 0.074768
|
|
},
|
|
{
|
|
"acc": 0.5844254,
|
|
"epoch": 0.5283757338551859,
|
|
"grad_norm": 0.8515625,
|
|
"learning_rate": 0.00013742015892404325,
|
|
"loss": 1.77252998,
|
|
"memory(GiB)": 19.51,
|
|
"step": 135,
|
|
"train_speed(iter/s)": 0.075066
|
|
},
|
|
{
|
|
"acc": 0.5998323,
|
|
"epoch": 0.547945205479452,
|
|
"grad_norm": 0.8671875,
|
|
"learning_rate": 0.0001371060907713942,
|
|
"loss": 1.69012871,
|
|
"memory(GiB)": 19.54,
|
|
"step": 140,
|
|
"train_speed(iter/s)": 0.07528
|
|
},
|
|
{
|
|
"acc": 0.62686119,
|
|
"epoch": 0.5675146771037182,
|
|
"grad_norm": 0.68359375,
|
|
"learning_rate": 0.00013677438887076603,
|
|
"loss": 1.66314449,
|
|
"memory(GiB)": 19.54,
|
|
"step": 145,
|
|
"train_speed(iter/s)": 0.075467
|
|
},
|
|
{
|
|
"acc": 0.59954901,
|
|
"epoch": 0.5870841487279843,
|
|
"grad_norm": 0.6328125,
|
|
"learning_rate": 0.00013642514038485367,
|
|
"loss": 1.67525444,
|
|
"memory(GiB)": 19.55,
|
|
"step": 150,
|
|
"train_speed(iter/s)": 0.075722
|
|
},
|
|
{
|
|
"epoch": 0.5870841487279843,
|
|
"eval_acc": 0.6184603299293009,
|
|
"eval_loss": 1.5965631008148193,
|
|
"eval_runtime": 72.3005,
|
|
"eval_samples_per_second": 1.051,
|
|
"eval_steps_per_second": 0.526,
|
|
"step": 150
|
|
},
|
|
{
|
|
"acc": 0.585955,
|
|
"epoch": 0.6066536203522505,
|
|
"grad_norm": 0.9375,
|
|
"learning_rate": 0.00013605843708714162,
|
|
"loss": 1.7486639,
|
|
"memory(GiB)": 23.22,
|
|
"step": 155,
|
|
"train_speed(iter/s)": 0.073368
|
|
},
|
|
{
|
|
"acc": 0.62769904,
|
|
"epoch": 0.6262230919765166,
|
|
"grad_norm": 0.7265625,
|
|
"learning_rate": 0.00013567437533778826,
|
|
"loss": 1.55238762,
|
|
"memory(GiB)": 19.62,
|
|
"step": 160,
|
|
"train_speed(iter/s)": 0.073628
|
|
},
|
|
{
|
|
"acc": 0.63651643,
|
|
"epoch": 0.6457925636007827,
|
|
"grad_norm": 0.80078125,
|
|
"learning_rate": 0.00013527305605830488,
|
|
"loss": 1.54306393,
|
|
"memory(GiB)": 19.88,
|
|
"step": 165,
|
|
"train_speed(iter/s)": 0.073903
|
|
},
|
|
{
|
|
"acc": 0.59288979,
|
|
"epoch": 0.6653620352250489,
|
|
"grad_norm": 0.703125,
|
|
"learning_rate": 0.0001348545847050361,
|
|
"loss": 1.69727612,
|
|
"memory(GiB)": 19.58,
|
|
"step": 170,
|
|
"train_speed(iter/s)": 0.074077
|
|
},
|
|
{
|
|
"acc": 0.61248484,
|
|
"epoch": 0.684931506849315,
|
|
"grad_norm": 0.9140625,
|
|
"learning_rate": 0.00013441907124144866,
|
|
"loss": 1.65900764,
|
|
"memory(GiB)": 19.49,
|
|
"step": 175,
|
|
"train_speed(iter/s)": 0.074329
|
|
},
|
|
{
|
|
"acc": 0.61740661,
|
|
"epoch": 0.7045009784735812,
|
|
"grad_norm": 0.90625,
|
|
"learning_rate": 0.0001339666301092358,
|
|
"loss": 1.6518961,
|
|
"memory(GiB)": 19.68,
|
|
"step": 180,
|
|
"train_speed(iter/s)": 0.074558
|
|
},
|
|
{
|
|
"acc": 0.62250223,
|
|
"epoch": 0.7240704500978473,
|
|
"grad_norm": 0.84765625,
|
|
"learning_rate": 0.00013349738019824512,
|
|
"loss": 1.55100412,
|
|
"memory(GiB)": 19.34,
|
|
"step": 185,
|
|
"train_speed(iter/s)": 0.07477
|
|
},
|
|
{
|
|
"acc": 0.61055808,
|
|
"epoch": 0.7436399217221135,
|
|
"grad_norm": 0.90625,
|
|
"learning_rate": 0.00013301144481523718,
|
|
"loss": 1.67241592,
|
|
"memory(GiB)": 19.56,
|
|
"step": 190,
|
|
"train_speed(iter/s)": 0.075006
|
|
},
|
|
{
|
|
"acc": 0.6389596,
|
|
"epoch": 0.7632093933463796,
|
|
"grad_norm": 0.83203125,
|
|
"learning_rate": 0.00013250895165148384,
|
|
"loss": 1.54227753,
|
|
"memory(GiB)": 19.29,
|
|
"step": 195,
|
|
"train_speed(iter/s)": 0.075192
|
|
},
|
|
{
|
|
"acc": 0.59149747,
|
|
"epoch": 0.7827788649706457,
|
|
"grad_norm": 0.68359375,
|
|
"learning_rate": 0.00013199003274921416,
|
|
"loss": 1.71190453,
|
|
"memory(GiB)": 19.35,
|
|
"step": 200,
|
|
"train_speed(iter/s)": 0.075393
|
|
},
|
|
{
|
|
"epoch": 0.7827788649706457,
|
|
"eval_acc": 0.6241162608012569,
|
|
"eval_loss": 1.5573129653930664,
|
|
"eval_runtime": 69.5471,
|
|
"eval_samples_per_second": 1.093,
|
|
"eval_steps_per_second": 0.546,
|
|
"step": 200
|
|
},
|
|
{
|
|
"acc": 0.62623324,
|
|
"epoch": 0.8023483365949119,
|
|
"grad_norm": 0.81640625,
|
|
"learning_rate": 0.00013145482446691724,
|
|
"loss": 1.55779324,
|
|
"memory(GiB)": 20.56,
|
|
"step": 205,
|
|
"train_speed(iter/s)": 0.073671
|
|
},
|
|
{
|
|
"acc": 0.61495056,
|
|
"epoch": 0.821917808219178,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 0.00013090346744351058,
|
|
"loss": 1.56424398,
|
|
"memory(GiB)": 19.48,
|
|
"step": 210,
|
|
"train_speed(iter/s)": 0.073902
|
|
},
|
|
{
|
|
"acc": 0.59643593,
|
|
"epoch": 0.8414872798434442,
|
|
"grad_norm": 1.0703125,
|
|
"learning_rate": 0.00013033610656138395,
|
|
"loss": 1.62190418,
|
|
"memory(GiB)": 19.5,
|
|
"step": 215,
|
|
"train_speed(iter/s)": 0.074133
|
|
},
|
|
{
|
|
"acc": 0.63052382,
|
|
"epoch": 0.8610567514677103,
|
|
"grad_norm": 0.59765625,
|
|
"learning_rate": 0.00012975289090832792,
|
|
"loss": 1.53521852,
|
|
"memory(GiB)": 19.53,
|
|
"step": 220,
|
|
"train_speed(iter/s)": 0.074334
|
|
},
|
|
{
|
|
"acc": 0.61408448,
|
|
"epoch": 0.8806262230919765,
|
|
"grad_norm": 0.7734375,
|
|
"learning_rate": 0.00012915397373835754,
|
|
"loss": 1.59712257,
|
|
"memory(GiB)": 19.52,
|
|
"step": 225,
|
|
"train_speed(iter/s)": 0.074533
|
|
},
|
|
{
|
|
"acc": 0.62307076,
|
|
"epoch": 0.9001956947162426,
|
|
"grad_norm": 0.66796875,
|
|
"learning_rate": 0.00012853951243144105,
|
|
"loss": 1.57903328,
|
|
"memory(GiB)": 19.49,
|
|
"step": 230,
|
|
"train_speed(iter/s)": 0.074719
|
|
},
|
|
{
|
|
"acc": 0.61717134,
|
|
"epoch": 0.9197651663405088,
|
|
"grad_norm": 0.84375,
|
|
"learning_rate": 0.00012790966845214457,
|
|
"loss": 1.61422024,
|
|
"memory(GiB)": 19.25,
|
|
"step": 235,
|
|
"train_speed(iter/s)": 0.074916
|
|
},
|
|
{
|
|
"acc": 0.62549253,
|
|
"epoch": 0.9393346379647749,
|
|
"grad_norm": 0.8125,
|
|
"learning_rate": 0.0001272646073072033,
|
|
"loss": 1.62806015,
|
|
"memory(GiB)": 19.36,
|
|
"step": 240,
|
|
"train_speed(iter/s)": 0.0751
|
|
},
|
|
{
|
|
"acc": 0.61903515,
|
|
"epoch": 0.958904109589041,
|
|
"grad_norm": 0.74609375,
|
|
"learning_rate": 0.0001266044985020307,
|
|
"loss": 1.55927486,
|
|
"memory(GiB)": 19.36,
|
|
"step": 245,
|
|
"train_speed(iter/s)": 0.075266
|
|
},
|
|
{
|
|
"acc": 0.61238952,
|
|
"epoch": 0.9784735812133072,
|
|
"grad_norm": 0.87890625,
|
|
"learning_rate": 0.00012592951549617683,
|
|
"loss": 1.52888412,
|
|
"memory(GiB)": 19.33,
|
|
"step": 250,
|
|
"train_speed(iter/s)": 0.075438
|
|
},
|
|
{
|
|
"epoch": 0.9784735812133072,
|
|
"eval_acc": 0.6267085624509033,
|
|
"eval_loss": 1.5281730890274048,
|
|
"eval_runtime": 69.069,
|
|
"eval_samples_per_second": 1.1,
|
|
"eval_steps_per_second": 0.55,
|
|
"step": 250
|
|
},
|
|
{
|
|
"acc": 0.63230977,
|
|
"epoch": 0.9980430528375733,
|
|
"grad_norm": 0.84765625,
|
|
"learning_rate": 0.00012523983565774753,
|
|
"loss": 1.53058205,
|
|
"memory(GiB)": 19.46,
|
|
"step": 255,
|
|
"train_speed(iter/s)": 0.074081
|
|
},
|
|
{
|
|
"acc": 0.66042156,
|
|
"epoch": 1.0176125244618395,
|
|
"grad_norm": 0.76171875,
|
|
"learning_rate": 0.00012453564021679692,
|
|
"loss": 1.37123928,
|
|
"memory(GiB)": 20.18,
|
|
"step": 260,
|
|
"train_speed(iter/s)": 0.074295
|
|
},
|
|
{
|
|
"acc": 0.67253222,
|
|
"epoch": 1.0371819960861057,
|
|
"grad_norm": 0.76953125,
|
|
"learning_rate": 0.00012381711421770455,
|
|
"loss": 1.28407507,
|
|
"memory(GiB)": 19.7,
|
|
"step": 265,
|
|
"train_speed(iter/s)": 0.074448
|
|
},
|
|
{
|
|
"acc": 0.66850777,
|
|
"epoch": 1.0567514677103718,
|
|
"grad_norm": 0.98046875,
|
|
"learning_rate": 0.0001230844464705507,
|
|
"loss": 1.27961807,
|
|
"memory(GiB)": 19.58,
|
|
"step": 270,
|
|
"train_speed(iter/s)": 0.07459
|
|
},
|
|
{
|
|
"acc": 0.67196817,
|
|
"epoch": 1.076320939334638,
|
|
"grad_norm": 0.9140625,
|
|
"learning_rate": 0.00012233782950150186,
|
|
"loss": 1.28494987,
|
|
"memory(GiB)": 19.61,
|
|
"step": 275,
|
|
"train_speed(iter/s)": 0.074728
|
|
},
|
|
{
|
|
"acc": 0.67708378,
|
|
"epoch": 1.095890410958904,
|
|
"grad_norm": 0.87109375,
|
|
"learning_rate": 0.00012157745950221989,
|
|
"loss": 1.29551096,
|
|
"memory(GiB)": 19.63,
|
|
"step": 280,
|
|
"train_speed(iter/s)": 0.074881
|
|
},
|
|
{
|
|
"acc": 0.66973438,
|
|
"epoch": 1.1154598825831703,
|
|
"grad_norm": 1.0859375,
|
|
"learning_rate": 0.0001208035362783079,
|
|
"loss": 1.27705774,
|
|
"memory(GiB)": 19.49,
|
|
"step": 285,
|
|
"train_speed(iter/s)": 0.075029
|
|
},
|
|
{
|
|
"acc": 0.6750237,
|
|
"epoch": 1.1350293542074363,
|
|
"grad_norm": 1.0859375,
|
|
"learning_rate": 0.00012001626319680648,
|
|
"loss": 1.25660419,
|
|
"memory(GiB)": 19.55,
|
|
"step": 290,
|
|
"train_speed(iter/s)": 0.07515
|
|
},
|
|
{
|
|
"acc": 0.624368,
|
|
"epoch": 1.1545988258317026,
|
|
"grad_norm": 1.1953125,
|
|
"learning_rate": 0.00011921584713275411,
|
|
"loss": 1.5070508,
|
|
"memory(GiB)": 19.52,
|
|
"step": 295,
|
|
"train_speed(iter/s)": 0.075278
|
|
},
|
|
{
|
|
"acc": 0.66252189,
|
|
"epoch": 1.1741682974559686,
|
|
"grad_norm": 0.828125,
|
|
"learning_rate": 0.0001184024984148257,
|
|
"loss": 1.32014723,
|
|
"memory(GiB)": 19.92,
|
|
"step": 300,
|
|
"train_speed(iter/s)": 0.075433
|
|
},
|
|
{
|
|
"epoch": 1.1741682974559686,
|
|
"eval_acc": 0.6282796543597801,
|
|
"eval_loss": 1.5250990390777588,
|
|
"eval_runtime": 70.3986,
|
|
"eval_samples_per_second": 1.08,
|
|
"eval_steps_per_second": 0.54,
|
|
"step": 300
|
|
},
|
|
{
|
|
"acc": 0.67028356,
|
|
"epoch": 1.1937377690802349,
|
|
"grad_norm": 1.7109375,
|
|
"learning_rate": 0.00011757643077006372,
|
|
"loss": 1.28037386,
|
|
"memory(GiB)": 22.6,
|
|
"step": 305,
|
|
"train_speed(iter/s)": 0.074243
|
|
},
|
|
{
|
|
"acc": 0.655305,
|
|
"epoch": 1.213307240704501,
|
|
"grad_norm": 1.1015625,
|
|
"learning_rate": 0.00011673786126771617,
|
|
"loss": 1.31057158,
|
|
"memory(GiB)": 19.72,
|
|
"step": 310,
|
|
"train_speed(iter/s)": 0.074392
|
|
},
|
|
{
|
|
"acc": 0.66528535,
|
|
"epoch": 1.2328767123287672,
|
|
"grad_norm": 1.6171875,
|
|
"learning_rate": 0.0001158870102621965,
|
|
"loss": 1.29698696,
|
|
"memory(GiB)": 19.08,
|
|
"step": 315,
|
|
"train_speed(iter/s)": 0.074534
|
|
},
|
|
{
|
|
"acc": 0.66950455,
|
|
"epoch": 1.2524461839530332,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 0.00011502410133517998,
|
|
"loss": 1.27706356,
|
|
"memory(GiB)": 19.87,
|
|
"step": 320,
|
|
"train_speed(iter/s)": 0.074667
|
|
},
|
|
{
|
|
"acc": 0.65843534,
|
|
"epoch": 1.2720156555772995,
|
|
"grad_norm": 1.2265625,
|
|
"learning_rate": 0.0001141493612368524,
|
|
"loss": 1.30308371,
|
|
"memory(GiB)": 19.87,
|
|
"step": 325,
|
|
"train_speed(iter/s)": 0.0748
|
|
},
|
|
{
|
|
"acc": 0.66441913,
|
|
"epoch": 1.2915851272015655,
|
|
"grad_norm": 1.2578125,
|
|
"learning_rate": 0.00011326301982632583,
|
|
"loss": 1.26109972,
|
|
"memory(GiB)": 19.09,
|
|
"step": 330,
|
|
"train_speed(iter/s)": 0.074935
|
|
},
|
|
{
|
|
"acc": 0.68711085,
|
|
"epoch": 1.3111545988258317,
|
|
"grad_norm": 0.95703125,
|
|
"learning_rate": 0.00011236531001123771,
|
|
"loss": 1.19278584,
|
|
"memory(GiB)": 19.73,
|
|
"step": 335,
|
|
"train_speed(iter/s)": 0.075053
|
|
},
|
|
{
|
|
"acc": 0.66676803,
|
|
"epoch": 1.3307240704500978,
|
|
"grad_norm": 1.96875,
|
|
"learning_rate": 0.0001114564676865486,
|
|
"loss": 1.3068346,
|
|
"memory(GiB)": 19.84,
|
|
"step": 340,
|
|
"train_speed(iter/s)": 0.075151
|
|
},
|
|
{
|
|
"acc": 0.66865935,
|
|
"epoch": 1.350293542074364,
|
|
"grad_norm": 1.2421875,
|
|
"learning_rate": 0.00011053673167255516,
|
|
"loss": 1.30573978,
|
|
"memory(GiB)": 19.66,
|
|
"step": 345,
|
|
"train_speed(iter/s)": 0.075271
|
|
},
|
|
{
|
|
"acc": 0.66606102,
|
|
"epoch": 1.36986301369863,
|
|
"grad_norm": 0.76171875,
|
|
"learning_rate": 0.00010960634365213437,
|
|
"loss": 1.26872787,
|
|
"memory(GiB)": 19.73,
|
|
"step": 350,
|
|
"train_speed(iter/s)": 0.075377
|
|
},
|
|
{
|
|
"epoch": 1.36986301369863,
|
|
"eval_acc": 0.6315003927729772,
|
|
"eval_loss": 1.5066882371902466,
|
|
"eval_runtime": 72.5685,
|
|
"eval_samples_per_second": 1.047,
|
|
"eval_steps_per_second": 0.524,
|
|
"step": 350
|
|
},
|
|
{
|
|
"acc": 0.67307239,
|
|
"epoch": 1.3894324853228963,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 0.0001086655481072354,
|
|
"loss": 1.27917318,
|
|
"memory(GiB)": 22.92,
|
|
"step": 355,
|
|
"train_speed(iter/s)": 0.074318
|
|
},
|
|
{
|
|
"acc": 0.65870218,
|
|
"epoch": 1.4090019569471623,
|
|
"grad_norm": 3.609375,
|
|
"learning_rate": 0.00010771459225463617,
|
|
"loss": 1.33731461,
|
|
"memory(GiB)": 19.67,
|
|
"step": 360,
|
|
"train_speed(iter/s)": 0.074416
|
|
},
|
|
{
|
|
"acc": 0.68150563,
|
|
"epoch": 1.4285714285714286,
|
|
"grad_norm": 0.9296875,
|
|
"learning_rate": 0.00010675372598098113,
|
|
"loss": 1.20515957,
|
|
"memory(GiB)": 19.99,
|
|
"step": 365,
|
|
"train_speed(iter/s)": 0.07451
|
|
},
|
|
{
|
|
"acc": 0.66793504,
|
|
"epoch": 1.4481409001956946,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 0.00010578320177711743,
|
|
"loss": 1.31133595,
|
|
"memory(GiB)": 19.9,
|
|
"step": 370,
|
|
"train_speed(iter/s)": 0.074613
|
|
},
|
|
{
|
|
"acc": 0.66840873,
|
|
"epoch": 1.467710371819961,
|
|
"grad_norm": 0.9453125,
|
|
"learning_rate": 0.00010480327467174705,
|
|
"loss": 1.27730675,
|
|
"memory(GiB)": 19.91,
|
|
"step": 375,
|
|
"train_speed(iter/s)": 0.074709
|
|
},
|
|
{
|
|
"acc": 0.6621439,
|
|
"epoch": 1.487279843444227,
|
|
"grad_norm": 0.7890625,
|
|
"learning_rate": 0.00010381420216441152,
|
|
"loss": 1.29670372,
|
|
"memory(GiB)": 19.65,
|
|
"step": 380,
|
|
"train_speed(iter/s)": 0.074824
|
|
},
|
|
{
|
|
"acc": 0.66805882,
|
|
"epoch": 1.5068493150684932,
|
|
"grad_norm": 0.8203125,
|
|
"learning_rate": 0.00010281624415782804,
|
|
"loss": 1.23922901,
|
|
"memory(GiB)": 19.77,
|
|
"step": 385,
|
|
"train_speed(iter/s)": 0.074927
|
|
},
|
|
{
|
|
"acc": 0.66435666,
|
|
"epoch": 1.5264187866927594,
|
|
"grad_norm": 0.82421875,
|
|
"learning_rate": 0.0001018096628895935,
|
|
"loss": 1.27945633,
|
|
"memory(GiB)": 19.79,
|
|
"step": 390,
|
|
"train_speed(iter/s)": 0.075033
|
|
},
|
|
{
|
|
"acc": 0.68444743,
|
|
"epoch": 1.5459882583170255,
|
|
"grad_norm": 0.98046875,
|
|
"learning_rate": 0.00010079472286327533,
|
|
"loss": 1.2325819,
|
|
"memory(GiB)": 19.55,
|
|
"step": 395,
|
|
"train_speed(iter/s)": 0.075133
|
|
},
|
|
{
|
|
"acc": 0.68633671,
|
|
"epoch": 1.5655577299412915,
|
|
"grad_norm": 1.171875,
|
|
"learning_rate": 9.977169077890672e-05,
|
|
"loss": 1.26248102,
|
|
"memory(GiB)": 19.79,
|
|
"step": 400,
|
|
"train_speed(iter/s)": 0.075233
|
|
},
|
|
{
|
|
"epoch": 1.5655577299412915,
|
|
"eval_acc": 0.6297721916732129,
|
|
"eval_loss": 1.5114485025405884,
|
|
"eval_runtime": 70.7985,
|
|
"eval_samples_per_second": 1.073,
|
|
"eval_steps_per_second": 0.537,
|
|
"step": 400
|
|
},
|
|
{
|
|
"acc": 0.67859097,
|
|
"epoch": 1.5851272015655578,
|
|
"grad_norm": 1.046875,
|
|
"learning_rate": 9.874083546290482e-05,
|
|
"loss": 1.2065486,
|
|
"memory(GiB)": 22.72,
|
|
"step": 405,
|
|
"train_speed(iter/s)": 0.074347
|
|
},
|
|
{
|
|
"acc": 0.66178751,
|
|
"epoch": 1.604696673189824,
|
|
"grad_norm": 0.96484375,
|
|
"learning_rate": 9.770242779743008e-05,
|
|
"loss": 1.30969448,
|
|
"memory(GiB)": 20.13,
|
|
"step": 410,
|
|
"train_speed(iter/s)": 0.074453
|
|
},
|
|
{
|
|
"acc": 0.65872512,
|
|
"epoch": 1.62426614481409,
|
|
"grad_norm": 0.74609375,
|
|
"learning_rate": 9.665674064920533e-05,
|
|
"loss": 1.27483397,
|
|
"memory(GiB)": 20.17,
|
|
"step": 415,
|
|
"train_speed(iter/s)": 0.074534
|
|
},
|
|
{
|
|
"acc": 0.66567349,
|
|
"epoch": 1.643835616438356,
|
|
"grad_norm": 0.87109375,
|
|
"learning_rate": 9.560404879781353e-05,
|
|
"loss": 1.31585007,
|
|
"memory(GiB)": 20.07,
|
|
"step": 420,
|
|
"train_speed(iter/s)": 0.074639
|
|
},
|
|
{
|
|
"acc": 0.66216898,
|
|
"epoch": 1.6634050880626223,
|
|
"grad_norm": 0.85546875,
|
|
"learning_rate": 9.454462886349281e-05,
|
|
"loss": 1.32738457,
|
|
"memory(GiB)": 19.43,
|
|
"step": 425,
|
|
"train_speed(iter/s)": 0.074732
|
|
},
|
|
{
|
|
"acc": 0.6608973,
|
|
"epoch": 1.6829745596868886,
|
|
"grad_norm": 1.1328125,
|
|
"learning_rate": 9.347875923444772e-05,
|
|
"loss": 1.2792593,
|
|
"memory(GiB)": 20.05,
|
|
"step": 430,
|
|
"train_speed(iter/s)": 0.074827
|
|
},
|
|
{
|
|
"acc": 0.65830297,
|
|
"epoch": 1.7025440313111546,
|
|
"grad_norm": 0.94921875,
|
|
"learning_rate": 9.240671999369607e-05,
|
|
"loss": 1.34132614,
|
|
"memory(GiB)": 19.82,
|
|
"step": 435,
|
|
"train_speed(iter/s)": 0.074914
|
|
},
|
|
{
|
|
"acc": 0.68926673,
|
|
"epoch": 1.7221135029354206,
|
|
"grad_norm": 0.76953125,
|
|
"learning_rate": 9.132879284547038e-05,
|
|
"loss": 1.15266266,
|
|
"memory(GiB)": 19.28,
|
|
"step": 440,
|
|
"train_speed(iter/s)": 0.074997
|
|
},
|
|
{
|
|
"acc": 0.65699558,
|
|
"epoch": 1.741682974559687,
|
|
"grad_norm": 0.96484375,
|
|
"learning_rate": 9.024526104119312e-05,
|
|
"loss": 1.32417459,
|
|
"memory(GiB)": 19.29,
|
|
"step": 445,
|
|
"train_speed(iter/s)": 0.075079
|
|
},
|
|
{
|
|
"acc": 0.68860197,
|
|
"epoch": 1.7612524461839532,
|
|
"grad_norm": 0.8203125,
|
|
"learning_rate": 8.91564093050458e-05,
|
|
"loss": 1.20134068,
|
|
"memory(GiB)": 19.33,
|
|
"step": 450,
|
|
"train_speed(iter/s)": 0.07515
|
|
},
|
|
{
|
|
"epoch": 1.7612524461839532,
|
|
"eval_acc": 0.6351924587588373,
|
|
"eval_loss": 1.4908838272094727,
|
|
"eval_runtime": 71.5161,
|
|
"eval_samples_per_second": 1.063,
|
|
"eval_steps_per_second": 0.531,
|
|
"step": 450
|
|
},
|
|
{
|
|
"acc": 0.65404687,
|
|
"epoch": 1.7808219178082192,
|
|
"grad_norm": 1.0078125,
|
|
"learning_rate": 8.806252375915052e-05,
|
|
"loss": 1.31502724,
|
|
"memory(GiB)": 19.13,
|
|
"step": 455,
|
|
"train_speed(iter/s)": 0.074358
|
|
},
|
|
{
|
|
"acc": 0.69379678,
|
|
"epoch": 1.8003913894324852,
|
|
"grad_norm": 1.1015625,
|
|
"learning_rate": 8.696389184838471e-05,
|
|
"loss": 1.1870966,
|
|
"memory(GiB)": 20.18,
|
|
"step": 460,
|
|
"train_speed(iter/s)": 0.074437
|
|
},
|
|
{
|
|
"acc": 0.67447538,
|
|
"epoch": 1.8199608610567515,
|
|
"grad_norm": 1.2890625,
|
|
"learning_rate": 8.586080226484789e-05,
|
|
"loss": 1.19511604,
|
|
"memory(GiB)": 20.09,
|
|
"step": 465,
|
|
"train_speed(iter/s)": 0.074531
|
|
},
|
|
{
|
|
"acc": 0.67230067,
|
|
"epoch": 1.8395303326810177,
|
|
"grad_norm": 1.0390625,
|
|
"learning_rate": 8.475354487200092e-05,
|
|
"loss": 1.30591021,
|
|
"memory(GiB)": 19.29,
|
|
"step": 470,
|
|
"train_speed(iter/s)": 0.074608
|
|
},
|
|
{
|
|
"acc": 0.65006552,
|
|
"epoch": 1.8590998043052838,
|
|
"grad_norm": 3.21875,
|
|
"learning_rate": 8.364241062849732e-05,
|
|
"loss": 1.35613279,
|
|
"memory(GiB)": 19.51,
|
|
"step": 475,
|
|
"train_speed(iter/s)": 0.07469
|
|
},
|
|
{
|
|
"acc": 0.66248426,
|
|
"epoch": 1.8786692759295498,
|
|
"grad_norm": 1.0703125,
|
|
"learning_rate": 8.252769151172682e-05,
|
|
"loss": 1.34706697,
|
|
"memory(GiB)": 19.16,
|
|
"step": 480,
|
|
"train_speed(iter/s)": 0.074779
|
|
},
|
|
{
|
|
"acc": 0.66462736,
|
|
"epoch": 1.898238747553816,
|
|
"grad_norm": 0.8515625,
|
|
"learning_rate": 8.140968044109134e-05,
|
|
"loss": 1.31343336,
|
|
"memory(GiB)": 19.17,
|
|
"step": 485,
|
|
"train_speed(iter/s)": 0.07486
|
|
},
|
|
{
|
|
"acc": 0.65373287,
|
|
"epoch": 1.9178082191780823,
|
|
"grad_norm": 1.078125,
|
|
"learning_rate": 8.028867120103326e-05,
|
|
"loss": 1.31145601,
|
|
"memory(GiB)": 19.46,
|
|
"step": 490,
|
|
"train_speed(iter/s)": 0.074941
|
|
},
|
|
{
|
|
"acc": 0.6731041,
|
|
"epoch": 1.9373776908023483,
|
|
"grad_norm": 0.89453125,
|
|
"learning_rate": 7.916495836383648e-05,
|
|
"loss": 1.24272699,
|
|
"memory(GiB)": 19.45,
|
|
"step": 495,
|
|
"train_speed(iter/s)": 0.075011
|
|
},
|
|
{
|
|
"acc": 0.66485052,
|
|
"epoch": 1.9569471624266144,
|
|
"grad_norm": 1.03125,
|
|
"learning_rate": 7.80388372122204e-05,
|
|
"loss": 1.28164721,
|
|
"memory(GiB)": 19.24,
|
|
"step": 500,
|
|
"train_speed(iter/s)": 0.07509
|
|
},
|
|
{
|
|
"epoch": 1.9569471624266144,
|
|
"eval_acc": 0.6349567949725059,
|
|
"eval_loss": 1.483258843421936,
|
|
"eval_runtime": 72.4797,
|
|
"eval_samples_per_second": 1.049,
|
|
"eval_steps_per_second": 0.524,
|
|
"step": 500
|
|
},
|
|
{
|
|
"acc": 0.68325486,
|
|
"epoch": 1.9765166340508806,
|
|
"grad_norm": 1.2890625,
|
|
"learning_rate": 7.691060366174728e-05,
|
|
"loss": 1.2257865,
|
|
"memory(GiB)": 22.98,
|
|
"step": 505,
|
|
"train_speed(iter/s)": 0.074371
|
|
},
|
|
{
|
|
"acc": 0.68977013,
|
|
"epoch": 1.9960861056751469,
|
|
"grad_norm": 1.0234375,
|
|
"learning_rate": 7.578055418306327e-05,
|
|
"loss": 1.25723343,
|
|
"memory(GiB)": 19.56,
|
|
"step": 510,
|
|
"train_speed(iter/s)": 0.074471
|
|
},
|
|
{
|
|
"acc": 0.72185702,
|
|
"epoch": 2.015655577299413,
|
|
"grad_norm": 0.7890625,
|
|
"learning_rate": 7.464898572399353e-05,
|
|
"loss": 1.01715631,
|
|
"memory(GiB)": 20.07,
|
|
"step": 515,
|
|
"train_speed(iter/s)": 0.074591
|
|
},
|
|
{
|
|
"acc": 0.71889682,
|
|
"epoch": 2.035225048923679,
|
|
"grad_norm": 1.0625,
|
|
"learning_rate": 7.351619563151208e-05,
|
|
"loss": 1.03077154,
|
|
"memory(GiB)": 19.92,
|
|
"step": 520,
|
|
"train_speed(iter/s)": 0.074683
|
|
},
|
|
{
|
|
"acc": 0.7505311,
|
|
"epoch": 2.0547945205479454,
|
|
"grad_norm": 1.9609375,
|
|
"learning_rate": 7.238248157360663e-05,
|
|
"loss": 0.93218956,
|
|
"memory(GiB)": 19.85,
|
|
"step": 525,
|
|
"train_speed(iter/s)": 0.07477
|
|
},
|
|
{
|
|
"acc": 0.7315311,
|
|
"epoch": 2.0743639921722115,
|
|
"grad_norm": 1.1875,
|
|
"learning_rate": 7.124814146105921e-05,
|
|
"loss": 0.96330833,
|
|
"memory(GiB)": 19.87,
|
|
"step": 530,
|
|
"train_speed(iter/s)": 0.074853
|
|
},
|
|
{
|
|
"acc": 0.75555606,
|
|
"epoch": 2.0939334637964775,
|
|
"grad_norm": 1.3515625,
|
|
"learning_rate": 7.011347336916277e-05,
|
|
"loss": 0.86877937,
|
|
"memory(GiB)": 18.46,
|
|
"step": 535,
|
|
"train_speed(iter/s)": 0.074938
|
|
},
|
|
{
|
|
"acc": 0.74034052,
|
|
"epoch": 2.1135029354207435,
|
|
"grad_norm": 1.546875,
|
|
"learning_rate": 6.897877545939475e-05,
|
|
"loss": 0.90922012,
|
|
"memory(GiB)": 19.89,
|
|
"step": 540,
|
|
"train_speed(iter/s)": 0.075027
|
|
},
|
|
{
|
|
"acc": 0.72400937,
|
|
"epoch": 2.1330724070450096,
|
|
"grad_norm": 1.90625,
|
|
"learning_rate": 6.784434590106808e-05,
|
|
"loss": 0.98424711,
|
|
"memory(GiB)": 19.11,
|
|
"step": 545,
|
|
"train_speed(iter/s)": 0.075114
|
|
},
|
|
{
|
|
"acc": 0.77706275,
|
|
"epoch": 2.152641878669276,
|
|
"grad_norm": 1.359375,
|
|
"learning_rate": 6.671048279297972e-05,
|
|
"loss": 0.80820856,
|
|
"memory(GiB)": 19.86,
|
|
"step": 550,
|
|
"train_speed(iter/s)": 0.075193
|
|
},
|
|
{
|
|
"epoch": 2.152641878669276,
|
|
"eval_acc": 0.6260015710919089,
|
|
"eval_loss": 1.6081812381744385,
|
|
"eval_runtime": 68.6973,
|
|
"eval_samples_per_second": 1.106,
|
|
"eval_steps_per_second": 0.553,
|
|
"step": 550
|
|
},
|
|
{
|
|
"acc": 0.75351696,
|
|
"epoch": 2.172211350293542,
|
|
"grad_norm": 2.015625,
|
|
"learning_rate": 6.55774840850782e-05,
|
|
"loss": 0.86192131,
|
|
"memory(GiB)": 22.21,
|
|
"step": 555,
|
|
"train_speed(iter/s)": 0.074578
|
|
},
|
|
{
|
|
"acc": 0.74249997,
|
|
"epoch": 2.191780821917808,
|
|
"grad_norm": 1.4609375,
|
|
"learning_rate": 6.444564750017003e-05,
|
|
"loss": 0.91982813,
|
|
"memory(GiB)": 19.87,
|
|
"step": 560,
|
|
"train_speed(iter/s)": 0.074665
|
|
},
|
|
{
|
|
"acc": 0.73636398,
|
|
"epoch": 2.2113502935420746,
|
|
"grad_norm": 1.9375,
|
|
"learning_rate": 6.331527045568573e-05,
|
|
"loss": 0.93448582,
|
|
"memory(GiB)": 19.33,
|
|
"step": 565,
|
|
"train_speed(iter/s)": 0.074752
|
|
},
|
|
{
|
|
"acc": 0.74081583,
|
|
"epoch": 2.2309197651663406,
|
|
"grad_norm": 2.21875,
|
|
"learning_rate": 6.218664998552634e-05,
|
|
"loss": 0.94956303,
|
|
"memory(GiB)": 19.8,
|
|
"step": 570,
|
|
"train_speed(iter/s)": 0.074842
|
|
},
|
|
{
|
|
"acc": 0.74573116,
|
|
"epoch": 2.2504892367906066,
|
|
"grad_norm": 2.546875,
|
|
"learning_rate": 6.106008266201046e-05,
|
|
"loss": 0.88486786,
|
|
"memory(GiB)": 19.92,
|
|
"step": 575,
|
|
"train_speed(iter/s)": 0.074925
|
|
},
|
|
{
|
|
"acc": 0.75495067,
|
|
"epoch": 2.2700587084148727,
|
|
"grad_norm": 2.09375,
|
|
"learning_rate": 5.9935864517942844e-05,
|
|
"loss": 0.84776802,
|
|
"memory(GiB)": 19.89,
|
|
"step": 580,
|
|
"train_speed(iter/s)": 0.075
|
|
},
|
|
{
|
|
"acc": 0.74743519,
|
|
"epoch": 2.2896281800391387,
|
|
"grad_norm": 1.5859375,
|
|
"learning_rate": 5.881429096882449e-05,
|
|
"loss": 0.92330503,
|
|
"memory(GiB)": 19.03,
|
|
"step": 585,
|
|
"train_speed(iter/s)": 0.075076
|
|
},
|
|
{
|
|
"acc": 0.74913769,
|
|
"epoch": 2.309197651663405,
|
|
"grad_norm": 1.6640625,
|
|
"learning_rate": 5.769565673522515e-05,
|
|
"loss": 0.92942295,
|
|
"memory(GiB)": 20.04,
|
|
"step": 590,
|
|
"train_speed(iter/s)": 0.075149
|
|
},
|
|
{
|
|
"acc": 0.74875064,
|
|
"epoch": 2.328767123287671,
|
|
"grad_norm": 1.25,
|
|
"learning_rate": 5.658025576533832e-05,
|
|
"loss": 0.90142069,
|
|
"memory(GiB)": 19.96,
|
|
"step": 595,
|
|
"train_speed(iter/s)": 0.075215
|
|
},
|
|
{
|
|
"acc": 0.74648356,
|
|
"epoch": 2.3483365949119372,
|
|
"grad_norm": 1.65625,
|
|
"learning_rate": 5.546838115773929e-05,
|
|
"loss": 0.91528139,
|
|
"memory(GiB)": 19.84,
|
|
"step": 600,
|
|
"train_speed(iter/s)": 0.075292
|
|
},
|
|
{
|
|
"epoch": 2.3483365949119372,
|
|
"eval_acc": 0.6284367635506677,
|
|
"eval_loss": 1.593437910079956,
|
|
"eval_runtime": 68.9856,
|
|
"eval_samples_per_second": 1.102,
|
|
"eval_steps_per_second": 0.551,
|
|
"step": 600
|
|
},
|
|
{
|
|
"acc": 0.75246172,
|
|
"epoch": 2.3679060665362037,
|
|
"grad_norm": 1.2109375,
|
|
"learning_rate": 5.4360325084366416e-05,
|
|
"loss": 0.87402363,
|
|
"memory(GiB)": 22.69,
|
|
"step": 605,
|
|
"train_speed(iter/s)": 0.074706
|
|
},
|
|
{
|
|
"acc": 0.74078665,
|
|
"epoch": 2.3874755381604698,
|
|
"grad_norm": 1.0390625,
|
|
"learning_rate": 5.3256378713745815e-05,
|
|
"loss": 0.91142588,
|
|
"memory(GiB)": 20.15,
|
|
"step": 610,
|
|
"train_speed(iter/s)": 0.074788
|
|
},
|
|
{
|
|
"acc": 0.75772052,
|
|
"epoch": 2.407045009784736,
|
|
"grad_norm": 2.03125,
|
|
"learning_rate": 5.21568321344799e-05,
|
|
"loss": 0.85517597,
|
|
"memory(GiB)": 19.37,
|
|
"step": 615,
|
|
"train_speed(iter/s)": 0.074857
|
|
},
|
|
{
|
|
"acc": 0.75341692,
|
|
"epoch": 2.426614481409002,
|
|
"grad_norm": 1.40625,
|
|
"learning_rate": 5.10619742790194e-05,
|
|
"loss": 0.87981377,
|
|
"memory(GiB)": 18.91,
|
|
"step": 620,
|
|
"train_speed(iter/s)": 0.074925
|
|
},
|
|
{
|
|
"acc": 0.76221485,
|
|
"epoch": 2.446183953033268,
|
|
"grad_norm": 5.5625,
|
|
"learning_rate": 4.9972092847739603e-05,
|
|
"loss": 0.89623175,
|
|
"memory(GiB)": 20.27,
|
|
"step": 625,
|
|
"train_speed(iter/s)": 0.074994
|
|
},
|
|
{
|
|
"acc": 0.74322577,
|
|
"epoch": 2.4657534246575343,
|
|
"grad_norm": 1.6796875,
|
|
"learning_rate": 4.8887474233339963e-05,
|
|
"loss": 0.89493027,
|
|
"memory(GiB)": 19.38,
|
|
"step": 630,
|
|
"train_speed(iter/s)": 0.075068
|
|
},
|
|
{
|
|
"acc": 0.74455509,
|
|
"epoch": 2.4853228962818004,
|
|
"grad_norm": 1.3046875,
|
|
"learning_rate": 4.780840344558753e-05,
|
|
"loss": 0.92399101,
|
|
"memory(GiB)": 19.32,
|
|
"step": 635,
|
|
"train_speed(iter/s)": 0.075143
|
|
},
|
|
{
|
|
"acc": 0.75597148,
|
|
"epoch": 2.5048923679060664,
|
|
"grad_norm": 1.65625,
|
|
"learning_rate": 4.673516403642383e-05,
|
|
"loss": 0.86396818,
|
|
"memory(GiB)": 19.52,
|
|
"step": 640,
|
|
"train_speed(iter/s)": 0.075214
|
|
},
|
|
{
|
|
"acc": 0.75100412,
|
|
"epoch": 2.524461839530333,
|
|
"grad_norm": 1.5390625,
|
|
"learning_rate": 4.5668038025454554e-05,
|
|
"loss": 0.89630232,
|
|
"memory(GiB)": 19.54,
|
|
"step": 645,
|
|
"train_speed(iter/s)": 0.07528
|
|
},
|
|
{
|
|
"acc": 0.74814,
|
|
"epoch": 2.544031311154599,
|
|
"grad_norm": 1.7265625,
|
|
"learning_rate": 4.460730582584228e-05,
|
|
"loss": 0.90660105,
|
|
"memory(GiB)": 19.46,
|
|
"step": 650,
|
|
"train_speed(iter/s)": 0.075343
|
|
},
|
|
{
|
|
"epoch": 2.544031311154599,
|
|
"eval_acc": 0.6304006284367636,
|
|
"eval_loss": 1.6207610368728638,
|
|
"eval_runtime": 68.9365,
|
|
"eval_samples_per_second": 1.102,
|
|
"eval_steps_per_second": 0.551,
|
|
"step": 650
|
|
},
|
|
{
|
|
"acc": 0.74153934,
|
|
"epoch": 2.563600782778865,
|
|
"grad_norm": 2.328125,
|
|
"learning_rate": 4.3553246170621e-05,
|
|
"loss": 0.90404129,
|
|
"memory(GiB)": 19.38,
|
|
"step": 655,
|
|
"train_speed(iter/s)": 0.074813
|
|
},
|
|
{
|
|
"acc": 0.76082869,
|
|
"epoch": 2.583170254403131,
|
|
"grad_norm": 1.5390625,
|
|
"learning_rate": 4.2506136039452357e-05,
|
|
"loss": 0.90251627,
|
|
"memory(GiB)": 20.24,
|
|
"step": 660,
|
|
"train_speed(iter/s)": 0.074877
|
|
},
|
|
{
|
|
"acc": 0.76424356,
|
|
"epoch": 2.602739726027397,
|
|
"grad_norm": 1.109375,
|
|
"learning_rate": 4.146625058584251e-05,
|
|
"loss": 0.85076065,
|
|
"memory(GiB)": 19.4,
|
|
"step": 665,
|
|
"train_speed(iter/s)": 0.07494
|
|
},
|
|
{
|
|
"acc": 0.75788155,
|
|
"epoch": 2.6223091976516635,
|
|
"grad_norm": 1.828125,
|
|
"learning_rate": 4.043386306483886e-05,
|
|
"loss": 0.8638917,
|
|
"memory(GiB)": 18.71,
|
|
"step": 670,
|
|
"train_speed(iter/s)": 0.075
|
|
},
|
|
{
|
|
"acc": 0.74567804,
|
|
"epoch": 2.6418786692759295,
|
|
"grad_norm": 1.5078125,
|
|
"learning_rate": 3.940924476122573e-05,
|
|
"loss": 0.91406345,
|
|
"memory(GiB)": 19.53,
|
|
"step": 675,
|
|
"train_speed(iter/s)": 0.075062
|
|
},
|
|
{
|
|
"acc": 0.77229648,
|
|
"epoch": 2.6614481409001955,
|
|
"grad_norm": 1.3984375,
|
|
"learning_rate": 3.839266491823776e-05,
|
|
"loss": 0.79556112,
|
|
"memory(GiB)": 19.59,
|
|
"step": 680,
|
|
"train_speed(iter/s)": 0.075125
|
|
},
|
|
{
|
|
"acc": 0.7331708,
|
|
"epoch": 2.681017612524462,
|
|
"grad_norm": 1.6015625,
|
|
"learning_rate": 3.73843906668096e-05,
|
|
"loss": 0.95133247,
|
|
"memory(GiB)": 19.69,
|
|
"step": 685,
|
|
"train_speed(iter/s)": 0.075185
|
|
},
|
|
{
|
|
"acc": 0.76955137,
|
|
"epoch": 2.700587084148728,
|
|
"grad_norm": 1.4140625,
|
|
"learning_rate": 3.6384686955380996e-05,
|
|
"loss": 0.82770052,
|
|
"memory(GiB)": 19.53,
|
|
"step": 690,
|
|
"train_speed(iter/s)": 0.075245
|
|
},
|
|
{
|
|
"acc": 0.73245583,
|
|
"epoch": 2.720156555772994,
|
|
"grad_norm": 1.59375,
|
|
"learning_rate": 3.539381648027495e-05,
|
|
"loss": 0.93347349,
|
|
"memory(GiB)": 19.38,
|
|
"step": 695,
|
|
"train_speed(iter/s)": 0.075313
|
|
},
|
|
{
|
|
"acc": 0.7664053,
|
|
"epoch": 2.73972602739726,
|
|
"grad_norm": 1.4296875,
|
|
"learning_rate": 3.441203961666818e-05,
|
|
"loss": 0.84118309,
|
|
"memory(GiB)": 19.55,
|
|
"step": 700,
|
|
"train_speed(iter/s)": 0.075373
|
|
},
|
|
{
|
|
"epoch": 2.73972602739726,
|
|
"eval_acc": 0.628750981932443,
|
|
"eval_loss": 1.5982366800308228,
|
|
"eval_runtime": 69.1268,
|
|
"eval_samples_per_second": 1.099,
|
|
"eval_steps_per_second": 0.55,
|
|
"step": 700
|
|
},
|
|
{
|
|
"acc": 0.74386759,
|
|
"epoch": 2.759295499021526,
|
|
"grad_norm": 2.21875,
|
|
"learning_rate": 3.343961435017094e-05,
|
|
"loss": 0.92712116,
|
|
"memory(GiB)": 23.1,
|
|
"step": 705,
|
|
"train_speed(iter/s)": 0.074881
|
|
},
|
|
{
|
|
"acc": 0.75352135,
|
|
"epoch": 2.7788649706457926,
|
|
"grad_norm": 1.5625,
|
|
"learning_rate": 3.247679620903533e-05,
|
|
"loss": 0.90610752,
|
|
"memory(GiB)": 19.56,
|
|
"step": 710,
|
|
"train_speed(iter/s)": 0.074934
|
|
},
|
|
{
|
|
"acc": 0.75765467,
|
|
"epoch": 2.7984344422700587,
|
|
"grad_norm": 4.4375,
|
|
"learning_rate": 3.1523838197008956e-05,
|
|
"loss": 0.88628139,
|
|
"memory(GiB)": 19.44,
|
|
"step": 715,
|
|
"train_speed(iter/s)": 0.074999
|
|
},
|
|
{
|
|
"acc": 0.763375,
|
|
"epoch": 2.8180039138943247,
|
|
"grad_norm": 1.1640625,
|
|
"learning_rate": 3.058099072685204e-05,
|
|
"loss": 0.86159172,
|
|
"memory(GiB)": 19.5,
|
|
"step": 720,
|
|
"train_speed(iter/s)": 0.075059
|
|
},
|
|
{
|
|
"acc": 0.75694184,
|
|
"epoch": 2.837573385518591,
|
|
"grad_norm": 1.6171875,
|
|
"learning_rate": 2.964850155453543e-05,
|
|
"loss": 0.85433092,
|
|
"memory(GiB)": 19.38,
|
|
"step": 725,
|
|
"train_speed(iter/s)": 0.075121
|
|
},
|
|
{
|
|
"acc": 0.76086893,
|
|
"epoch": 2.857142857142857,
|
|
"grad_norm": 1.5859375,
|
|
"learning_rate": 2.8726615714136827e-05,
|
|
"loss": 0.8608798,
|
|
"memory(GiB)": 19.58,
|
|
"step": 730,
|
|
"train_speed(iter/s)": 0.075181
|
|
},
|
|
{
|
|
"acc": 0.74008894,
|
|
"epoch": 2.8767123287671232,
|
|
"grad_norm": 1.4375,
|
|
"learning_rate": 2.7815575453452058e-05,
|
|
"loss": 0.98413734,
|
|
"memory(GiB)": 19.59,
|
|
"step": 735,
|
|
"train_speed(iter/s)": 0.075242
|
|
},
|
|
{
|
|
"acc": 0.75941825,
|
|
"epoch": 2.8962818003913893,
|
|
"grad_norm": 1.7734375,
|
|
"learning_rate": 2.6915620170338612e-05,
|
|
"loss": 0.85438929,
|
|
"memory(GiB)": 19.39,
|
|
"step": 740,
|
|
"train_speed(iter/s)": 0.075307
|
|
},
|
|
{
|
|
"acc": 0.77891464,
|
|
"epoch": 2.9158512720156553,
|
|
"grad_norm": 1.7265625,
|
|
"learning_rate": 2.6026986349808058e-05,
|
|
"loss": 0.79716868,
|
|
"memory(GiB)": 19.61,
|
|
"step": 745,
|
|
"train_speed(iter/s)": 0.075361
|
|
},
|
|
{
|
|
"acc": 0.75023217,
|
|
"epoch": 2.935420743639922,
|
|
"grad_norm": 1.28125,
|
|
"learning_rate": 2.514990750188399e-05,
|
|
"loss": 0.85774508,
|
|
"memory(GiB)": 18.86,
|
|
"step": 750,
|
|
"train_speed(iter/s)": 0.075417
|
|
},
|
|
{
|
|
"epoch": 2.935420743639922,
|
|
"eval_acc": 0.6324430479183032,
|
|
"eval_loss": 1.5986852645874023,
|
|
"eval_runtime": 69.3348,
|
|
"eval_samples_per_second": 1.096,
|
|
"eval_steps_per_second": 0.548,
|
|
"step": 750
|
|
},
|
|
{
|
|
"acc": 0.74531512,
|
|
"epoch": 2.954990215264188,
|
|
"grad_norm": 1.5625,
|
|
"learning_rate": 2.4284614100241538e-05,
|
|
"loss": 0.93483381,
|
|
"memory(GiB)": 23.14,
|
|
"step": 755,
|
|
"train_speed(iter/s)": 0.074953
|
|
},
|
|
{
|
|
"acc": 0.76761031,
|
|
"epoch": 2.974559686888454,
|
|
"grad_norm": 1.6171875,
|
|
"learning_rate": 2.343133352164477e-05,
|
|
"loss": 0.84630623,
|
|
"memory(GiB)": 19.36,
|
|
"step": 760,
|
|
"train_speed(iter/s)": 0.075015
|
|
},
|
|
{
|
|
"acc": 0.75018072,
|
|
"epoch": 2.9941291585127203,
|
|
"grad_norm": 1.5703125,
|
|
"learning_rate": 2.2590289986198136e-05,
|
|
"loss": 0.89352074,
|
|
"memory(GiB)": 19.6,
|
|
"step": 765,
|
|
"train_speed(iter/s)": 0.075072
|
|
},
|
|
{
|
|
"acc": 0.80383377,
|
|
"epoch": 3.0136986301369864,
|
|
"grad_norm": 1.453125,
|
|
"learning_rate": 2.1761704498427003e-05,
|
|
"loss": 0.68276234,
|
|
"memory(GiB)": 19.62,
|
|
"step": 770,
|
|
"train_speed(iter/s)": 0.075153
|
|
},
|
|
{
|
|
"acc": 0.82252359,
|
|
"epoch": 3.0332681017612524,
|
|
"grad_norm": 1.328125,
|
|
"learning_rate": 2.094579478920358e-05,
|
|
"loss": 0.64008789,
|
|
"memory(GiB)": 19.76,
|
|
"step": 775,
|
|
"train_speed(iter/s)": 0.075213
|
|
},
|
|
{
|
|
"acc": 0.83448801,
|
|
"epoch": 3.0528375733855184,
|
|
"grad_norm": 1.8828125,
|
|
"learning_rate": 2.0142775258532654e-05,
|
|
"loss": 0.61610913,
|
|
"memory(GiB)": 19.59,
|
|
"step": 780,
|
|
"train_speed(iter/s)": 0.075271
|
|
},
|
|
{
|
|
"acc": 0.83116817,
|
|
"epoch": 3.072407045009785,
|
|
"grad_norm": 1.5546875,
|
|
"learning_rate": 1.9352856919212994e-05,
|
|
"loss": 0.58688097,
|
|
"memory(GiB)": 19.53,
|
|
"step": 785,
|
|
"train_speed(iter/s)": 0.075323
|
|
},
|
|
{
|
|
"acc": 0.82525949,
|
|
"epoch": 3.091976516634051,
|
|
"grad_norm": 1.4375,
|
|
"learning_rate": 1.8576247341388544e-05,
|
|
"loss": 0.62312498,
|
|
"memory(GiB)": 19.85,
|
|
"step": 790,
|
|
"train_speed(iter/s)": 0.07537
|
|
},
|
|
{
|
|
"acc": 0.81645441,
|
|
"epoch": 3.111545988258317,
|
|
"grad_norm": 1.65625,
|
|
"learning_rate": 1.7813150598004313e-05,
|
|
"loss": 0.62203112,
|
|
"memory(GiB)": 19.79,
|
|
"step": 795,
|
|
"train_speed(iter/s)": 0.075423
|
|
},
|
|
{
|
|
"acc": 0.83432789,
|
|
"epoch": 3.131115459882583,
|
|
"grad_norm": 1.5859375,
|
|
"learning_rate": 1.7063767211181333e-05,
|
|
"loss": 0.60077624,
|
|
"memory(GiB)": 19.52,
|
|
"step": 800,
|
|
"train_speed(iter/s)": 0.07548
|
|
},
|
|
{
|
|
"epoch": 3.131115459882583,
|
|
"eval_acc": 0.6209740769835035,
|
|
"eval_loss": 1.7955598831176758,
|
|
"eval_runtime": 69.0109,
|
|
"eval_samples_per_second": 1.101,
|
|
"eval_steps_per_second": 0.551,
|
|
"step": 800
|
|
},
|
|
{
|
|
"acc": 0.82124023,
|
|
"epoch": 3.1506849315068495,
|
|
"grad_norm": 1.7578125,
|
|
"learning_rate": 1.6328294099524644e-05,
|
|
"loss": 0.60847788,
|
|
"memory(GiB)": 22.65,
|
|
"step": 805,
|
|
"train_speed(iter/s)": 0.075043
|
|
},
|
|
{
|
|
"acc": 0.83265171,
|
|
"epoch": 3.1702544031311155,
|
|
"grad_norm": 4.09375,
|
|
"learning_rate": 1.5606924526378136e-05,
|
|
"loss": 0.57863126,
|
|
"memory(GiB)": 18.89,
|
|
"step": 810,
|
|
"train_speed(iter/s)": 0.07509
|
|
},
|
|
{
|
|
"acc": 0.8407362,
|
|
"epoch": 3.1898238747553815,
|
|
"grad_norm": 1.1796875,
|
|
"learning_rate": 1.4899848049039881e-05,
|
|
"loss": 0.53706379,
|
|
"memory(GiB)": 19.37,
|
|
"step": 815,
|
|
"train_speed(iter/s)": 0.075142
|
|
},
|
|
{
|
|
"acc": 0.82116756,
|
|
"epoch": 3.2093933463796476,
|
|
"grad_norm": 1.859375,
|
|
"learning_rate": 1.4207250468951426e-05,
|
|
"loss": 0.64039102,
|
|
"memory(GiB)": 19.52,
|
|
"step": 820,
|
|
"train_speed(iter/s)": 0.075197
|
|
},
|
|
{
|
|
"acc": 0.85004549,
|
|
"epoch": 3.228962818003914,
|
|
"grad_norm": 1.0390625,
|
|
"learning_rate": 1.3529313782874023e-05,
|
|
"loss": 0.53315983,
|
|
"memory(GiB)": 19.52,
|
|
"step": 825,
|
|
"train_speed(iter/s)": 0.07525
|
|
},
|
|
{
|
|
"acc": 0.83273296,
|
|
"epoch": 3.24853228962818,
|
|
"grad_norm": 1.578125,
|
|
"learning_rate": 1.2866216135064487e-05,
|
|
"loss": 0.58545351,
|
|
"memory(GiB)": 19.36,
|
|
"step": 830,
|
|
"train_speed(iter/s)": 0.075303
|
|
},
|
|
{
|
|
"acc": 0.80788403,
|
|
"epoch": 3.268101761252446,
|
|
"grad_norm": 2.296875,
|
|
"learning_rate": 1.2218131770463487e-05,
|
|
"loss": 0.67468171,
|
|
"memory(GiB)": 19.28,
|
|
"step": 835,
|
|
"train_speed(iter/s)": 0.075356
|
|
},
|
|
{
|
|
"acc": 0.8440134,
|
|
"epoch": 3.287671232876712,
|
|
"grad_norm": 1.21875,
|
|
"learning_rate": 1.1585230988908576e-05,
|
|
"loss": 0.55293651,
|
|
"memory(GiB)": 19.37,
|
|
"step": 840,
|
|
"train_speed(iter/s)": 0.07541
|
|
},
|
|
{
|
|
"acc": 0.81569691,
|
|
"epoch": 3.3072407045009786,
|
|
"grad_norm": 1.671875,
|
|
"learning_rate": 1.0967680100383645e-05,
|
|
"loss": 0.61190109,
|
|
"memory(GiB)": 18.09,
|
|
"step": 845,
|
|
"train_speed(iter/s)": 0.075466
|
|
},
|
|
{
|
|
"acc": 0.84766483,
|
|
"epoch": 3.3268101761252447,
|
|
"grad_norm": 1.8046875,
|
|
"learning_rate": 1.0365641381317113e-05,
|
|
"loss": 0.52525816,
|
|
"memory(GiB)": 19.31,
|
|
"step": 850,
|
|
"train_speed(iter/s)": 0.075523
|
|
},
|
|
{
|
|
"epoch": 3.3268101761252447,
|
|
"eval_acc": 0.6203456402199529,
|
|
"eval_loss": 1.7881730794906616,
|
|
"eval_runtime": 69.1552,
|
|
"eval_samples_per_second": 1.099,
|
|
"eval_steps_per_second": 0.549,
|
|
"step": 850
|
|
},
|
|
{
|
|
"acc": 0.84491625,
|
|
"epoch": 3.3463796477495107,
|
|
"grad_norm": 1.8046875,
|
|
"learning_rate": 9.779273031939692e-06,
|
|
"loss": 0.56272998,
|
|
"memory(GiB)": 23.04,
|
|
"step": 855,
|
|
"train_speed(iter/s)": 0.07511
|
|
},
|
|
{
|
|
"acc": 0.84104662,
|
|
"epoch": 3.3659491193737767,
|
|
"grad_norm": 1.796875,
|
|
"learning_rate": 9.20872913471363e-06,
|
|
"loss": 0.57019663,
|
|
"memory(GiB)": 19.42,
|
|
"step": 860,
|
|
"train_speed(iter/s)": 0.075157
|
|
},
|
|
{
|
|
"acc": 0.84433002,
|
|
"epoch": 3.385518590998043,
|
|
"grad_norm": 1.6484375,
|
|
"learning_rate": 8.654159613843715e-06,
|
|
"loss": 0.55449514,
|
|
"memory(GiB)": 19.59,
|
|
"step": 865,
|
|
"train_speed(iter/s)": 0.07521
|
|
},
|
|
{
|
|
"acc": 0.80005312,
|
|
"epoch": 3.4050880626223092,
|
|
"grad_norm": 1.46875,
|
|
"learning_rate": 8.115710195881068e-06,
|
|
"loss": 0.73595409,
|
|
"memory(GiB)": 19.36,
|
|
"step": 870,
|
|
"train_speed(iter/s)": 0.075258
|
|
},
|
|
{
|
|
"acc": 0.83217945,
|
|
"epoch": 3.4246575342465753,
|
|
"grad_norm": 3.328125,
|
|
"learning_rate": 7.593522371429972e-06,
|
|
"loss": 0.58270836,
|
|
"memory(GiB)": 19.58,
|
|
"step": 875,
|
|
"train_speed(iter/s)": 0.075306
|
|
},
|
|
{
|
|
"acc": 0.82742786,
|
|
"epoch": 3.4442270058708413,
|
|
"grad_norm": 1.234375,
|
|
"learning_rate": 7.0877333579678585e-06,
|
|
"loss": 0.59052157,
|
|
"memory(GiB)": 19.6,
|
|
"step": 880,
|
|
"train_speed(iter/s)": 0.075358
|
|
},
|
|
{
|
|
"acc": 0.81994705,
|
|
"epoch": 3.4637964774951078,
|
|
"grad_norm": 1.7578125,
|
|
"learning_rate": 6.598476063788036e-06,
|
|
"loss": 0.62256751,
|
|
"memory(GiB)": 19.56,
|
|
"step": 885,
|
|
"train_speed(iter/s)": 0.075405
|
|
},
|
|
{
|
|
"acc": 0.8157341,
|
|
"epoch": 3.483365949119374,
|
|
"grad_norm": 1.8203125,
|
|
"learning_rate": 6.12587905307477e-06,
|
|
"loss": 0.66806622,
|
|
"memory(GiB)": 19.49,
|
|
"step": 890,
|
|
"train_speed(iter/s)": 0.075454
|
|
},
|
|
{
|
|
"acc": 0.82838688,
|
|
"epoch": 3.50293542074364,
|
|
"grad_norm": 1.515625,
|
|
"learning_rate": 5.67006651212008e-06,
|
|
"loss": 0.63044977,
|
|
"memory(GiB)": 19.54,
|
|
"step": 895,
|
|
"train_speed(iter/s)": 0.075497
|
|
},
|
|
{
|
|
"acc": 0.79130597,
|
|
"epoch": 3.5225048923679063,
|
|
"grad_norm": 1.640625,
|
|
"learning_rate": 5.2311582166906605e-06,
|
|
"loss": 0.7558567,
|
|
"memory(GiB)": 19.28,
|
|
"step": 900,
|
|
"train_speed(iter/s)": 0.07555
|
|
},
|
|
{
|
|
"epoch": 3.5225048923679063,
|
|
"eval_acc": 0.6211311861743912,
|
|
"eval_loss": 1.7854998111724854,
|
|
"eval_runtime": 69.2434,
|
|
"eval_samples_per_second": 1.098,
|
|
"eval_steps_per_second": 0.549,
|
|
"step": 900
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 1020,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 4,
|
|
"save_steps": 300,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2.605539502350213e+17,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|