cima_joint_model / trainer_state.json
ndaheim's picture
initial model
ad6c1a6
raw
history blame
18.8 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 16.99889502762431,
"global_step": 5763,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.38,
"gpu_memory": 2987030016,
"learning_rate": 8.32e-06,
"loss": 4.0407,
"step": 128
},
{
"epoch": 0.75,
"gpu_memory": 3076460544,
"learning_rate": 1.664e-05,
"loss": 2.405,
"step": 256
},
{
"epoch": 1.0,
"eval_bp": 0.035349686560536234,
"eval_counts": [
505,
125,
50,
11
],
"eval_loss": 1.9292821884155273,
"eval_precisions": [
46.118721461187214,
15.723270440251572,
9.861932938856016,
4.471544715447155
],
"eval_ref_len": 4755,
"eval_runtime": 44.1807,
"eval_samples_per_second": 6.79,
"eval_score": 0.47271078280719403,
"eval_steps_per_second": 6.79,
"eval_sys_len": 1095,
"eval_totals": [
1095,
795,
507,
246
],
"gpu_memory": 3076460544,
"step": 339
},
{
"epoch": 1.13,
"gpu_memory": 3076460544,
"learning_rate": 2.4959999999999998e-05,
"loss": 2.0089,
"step": 384
},
{
"epoch": 1.51,
"gpu_memory": 3076460544,
"learning_rate": 3.2437898089171974e-05,
"loss": 1.8155,
"step": 512
},
{
"epoch": 1.89,
"gpu_memory": 3076460544,
"learning_rate": 3.1775477707006364e-05,
"loss": 1.7234,
"step": 640
},
{
"epoch": 2.0,
"eval_bp": 0.0840891954437523,
"eval_counts": [
492,
189,
85,
29
],
"eval_loss": 1.6681220531463623,
"eval_precisions": [
35.96491228070175,
17.696629213483146,
10.303030303030303,
4.833333333333333
],
"eval_ref_len": 4755,
"eval_runtime": 53.1682,
"eval_samples_per_second": 5.642,
"eval_score": 1.1219810390322362,
"eval_steps_per_second": 5.642,
"eval_sys_len": 1368,
"eval_totals": [
1368,
1068,
825,
600
],
"gpu_memory": 3076460544,
"step": 678
},
{
"epoch": 2.27,
"gpu_memory": 3076460544,
"learning_rate": 3.111305732484076e-05,
"loss": 1.6058,
"step": 768
},
{
"epoch": 2.64,
"gpu_memory": 3076460544,
"learning_rate": 3.0450636942675155e-05,
"loss": 1.5189,
"step": 896
},
{
"epoch": 3.0,
"eval_bp": 0.09192776836698148,
"eval_counts": [
571,
192,
93,
40
],
"eval_loss": 1.5985139608383179,
"eval_precisions": [
40.66951566951567,
17.391304347826086,
11.03202846975089,
6.734006734006734
],
"eval_ref_len": 4755,
"eval_runtime": 44.5545,
"eval_samples_per_second": 6.733,
"eval_score": 1.391807704814939,
"eval_steps_per_second": 6.733,
"eval_sys_len": 1404,
"eval_totals": [
1404,
1104,
843,
594
],
"gpu_memory": 3076460544,
"step": 1017
},
{
"epoch": 3.02,
"gpu_memory": 3076460544,
"learning_rate": 2.9788216560509553e-05,
"loss": 1.4885,
"step": 1024
},
{
"epoch": 3.4,
"gpu_memory": 3076460544,
"learning_rate": 2.9125796178343946e-05,
"loss": 1.334,
"step": 1152
},
{
"epoch": 3.77,
"gpu_memory": 3076460544,
"learning_rate": 2.8463375796178344e-05,
"loss": 1.3861,
"step": 1280
},
{
"epoch": 4.0,
"eval_bp": 0.034513967404432855,
"eval_counts": [
432,
173,
84,
35
],
"eval_loss": 1.6043497323989868,
"eval_precisions": [
39.66942148760331,
21.926489226869457,
16.184971098265898,
9.48509485094851
],
"eval_ref_len": 4755,
"eval_runtime": 42.8527,
"eval_samples_per_second": 7.001,
"eval_score": 0.6597653875525311,
"eval_steps_per_second": 7.001,
"eval_sys_len": 1089,
"eval_totals": [
1089,
789,
519,
369
],
"gpu_memory": 3076460544,
"step": 1356
},
{
"epoch": 4.15,
"gpu_memory": 3076460544,
"learning_rate": 2.7800955414012737e-05,
"loss": 1.3367,
"step": 1408
},
{
"epoch": 4.53,
"gpu_memory": 3076460544,
"learning_rate": 2.713853503184713e-05,
"loss": 1.2828,
"step": 1536
},
{
"epoch": 4.91,
"gpu_memory": 3076460544,
"learning_rate": 2.647611464968153e-05,
"loss": 1.2571,
"step": 1664
},
{
"epoch": 5.0,
"eval_bp": 0.17929973112718744,
"eval_counts": [
671,
230,
102,
43
],
"eval_loss": 1.5908681154251099,
"eval_precisions": [
38.36477987421384,
15.873015873015873,
8.695652173913043,
4.699453551912568
],
"eval_ref_len": 4755,
"eval_runtime": 55.6404,
"eval_samples_per_second": 5.392,
"eval_score": 2.2519827467510987,
"eval_steps_per_second": 5.392,
"eval_sys_len": 1749,
"eval_totals": [
1749,
1449,
1173,
915
],
"gpu_memory": 3076460544,
"step": 1695
},
{
"epoch": 5.29,
"gpu_memory": 3076460544,
"learning_rate": 2.5813694267515922e-05,
"loss": 1.2035,
"step": 1792
},
{
"epoch": 5.66,
"gpu_memory": 3076460544,
"learning_rate": 2.515127388535032e-05,
"loss": 1.183,
"step": 1920
},
{
"epoch": 6.0,
"eval_bp": 0.07050485313640832,
"eval_counts": [
615,
257,
141,
80
],
"eval_loss": 1.5943706035614014,
"eval_precisions": [
47.235023041474655,
25.64870259481038,
19.502074688796682,
14.109347442680775
],
"eval_ref_len": 4755,
"eval_runtime": 47.1825,
"eval_samples_per_second": 6.358,
"eval_score": 1.6941362350992444,
"eval_steps_per_second": 6.358,
"eval_sys_len": 1302,
"eval_totals": [
1302,
1002,
723,
567
],
"gpu_memory": 3076460544,
"step": 2034
},
{
"epoch": 6.04,
"gpu_memory": 3076460544,
"learning_rate": 2.4488853503184713e-05,
"loss": 1.1964,
"step": 2048
},
{
"epoch": 6.42,
"gpu_memory": 3076460544,
"learning_rate": 2.3826433121019104e-05,
"loss": 1.1073,
"step": 2176
},
{
"epoch": 6.8,
"gpu_memory": 3076460544,
"learning_rate": 2.31640127388535e-05,
"loss": 1.1316,
"step": 2304
},
{
"epoch": 7.0,
"eval_bp": 0.10421315891869368,
"eval_counts": [
649,
197,
79,
22
],
"eval_loss": 1.6070951223373413,
"eval_precisions": [
44.51303155006859,
17.012089810017272,
9.111880046136102,
3.559870550161812
],
"eval_ref_len": 4755,
"eval_runtime": 47.6479,
"eval_samples_per_second": 6.296,
"eval_score": 1.3046509061748794,
"eval_steps_per_second": 6.296,
"eval_sys_len": 1458,
"eval_totals": [
1458,
1158,
867,
618
],
"gpu_memory": 3076460544,
"step": 2373
},
{
"epoch": 7.17,
"gpu_memory": 3076460544,
"learning_rate": 2.2501592356687895e-05,
"loss": 1.0398,
"step": 2432
},
{
"epoch": 7.55,
"gpu_memory": 3076460544,
"learning_rate": 2.183917197452229e-05,
"loss": 1.0349,
"step": 2560
},
{
"epoch": 7.93,
"gpu_memory": 3076460544,
"learning_rate": 2.1176751592356686e-05,
"loss": 1.0816,
"step": 2688
},
{
"epoch": 8.0,
"eval_bp": 0.21001389512353258,
"eval_counts": [
846,
344,
187,
105
],
"eval_loss": 1.6298103332519531,
"eval_precisions": [
45.55735056542811,
22.093770070648684,
14.597970335675253,
10.294117647058824
],
"eval_ref_len": 4755,
"eval_runtime": 54.6716,
"eval_samples_per_second": 5.487,
"eval_score": 4.141670104799348,
"eval_steps_per_second": 5.487,
"eval_sys_len": 1857,
"eval_totals": [
1857,
1557,
1281,
1020
],
"gpu_memory": 3076460544,
"step": 2712
},
{
"epoch": 8.31,
"gpu_memory": 3076460544,
"learning_rate": 2.051433121019108e-05,
"loss": 0.987,
"step": 2816
},
{
"epoch": 8.68,
"gpu_memory": 3076460544,
"learning_rate": 1.9851910828025477e-05,
"loss": 0.9829,
"step": 2944
},
{
"epoch": 9.0,
"eval_bp": 0.06525766524199453,
"eval_counts": [
577,
216,
100,
37
],
"eval_loss": 1.6366333961486816,
"eval_precisions": [
45.254901960784316,
22.153846153846153,
14.367816091954023,
7.297830374753452
],
"eval_ref_len": 4755,
"eval_runtime": 49.3567,
"eval_samples_per_second": 6.078,
"eval_score": 1.1750500193614282,
"eval_steps_per_second": 6.078,
"eval_sys_len": 1275,
"eval_totals": [
1275,
975,
696,
507
],
"gpu_memory": 3076460544,
"step": 3051
},
{
"epoch": 9.06,
"gpu_memory": 3076460544,
"learning_rate": 1.918949044585987e-05,
"loss": 1.003,
"step": 3072
},
{
"epoch": 9.44,
"gpu_memory": 3076460544,
"learning_rate": 1.8527070063694264e-05,
"loss": 0.9337,
"step": 3200
},
{
"epoch": 9.82,
"gpu_memory": 3076460544,
"learning_rate": 1.786464968152866e-05,
"loss": 0.9325,
"step": 3328
},
{
"epoch": 10.0,
"eval_bp": 0.16851984622310243,
"eval_counts": [
667,
248,
121,
62
],
"eval_loss": 1.67235267162323,
"eval_precisions": [
39.005847953216374,
17.588652482269502,
10.503472222222221,
6.68824163969795
],
"eval_ref_len": 4755,
"eval_runtime": 50.0923,
"eval_samples_per_second": 5.989,
"eval_score": 2.4969097127652855,
"eval_steps_per_second": 5.989,
"eval_sys_len": 1710,
"eval_totals": [
1710,
1410,
1152,
927
],
"gpu_memory": 3076460544,
"step": 3390
},
{
"epoch": 10.19,
"gpu_memory": 3076460544,
"learning_rate": 1.7202229299363055e-05,
"loss": 0.9075,
"step": 3456
},
{
"epoch": 10.57,
"gpu_memory": 3076460544,
"learning_rate": 1.6539808917197452e-05,
"loss": 0.8753,
"step": 3584
},
{
"epoch": 10.95,
"gpu_memory": 3076460544,
"learning_rate": 1.5877388535031846e-05,
"loss": 0.9098,
"step": 3712
},
{
"epoch": 11.0,
"eval_bp": 0.1483387334695538,
"eval_counts": [
735,
268,
134,
67
],
"eval_loss": 1.6972090005874634,
"eval_precisions": [
44.95412844036697,
20.074906367041198,
12.725546058879392,
8.18070818070818
],
"eval_ref_len": 4755,
"eval_runtime": 46.9235,
"eval_samples_per_second": 6.393,
"eval_score": 2.5970312545681904,
"eval_steps_per_second": 6.393,
"eval_sys_len": 1635,
"eval_totals": [
1635,
1335,
1053,
819
],
"gpu_memory": 3076460544,
"step": 3729
},
{
"epoch": 11.33,
"gpu_memory": 3076460544,
"learning_rate": 1.5214968152866242e-05,
"loss": 0.839,
"step": 3840
},
{
"epoch": 11.7,
"gpu_memory": 3076460544,
"learning_rate": 1.4552547770700635e-05,
"loss": 0.8643,
"step": 3968
},
{
"epoch": 12.0,
"eval_bp": 0.1320190352563076,
"eval_counts": [
715,
285,
143,
70
],
"eval_loss": 1.713928461074829,
"eval_precisions": [
45.48346055979644,
22.40566037735849,
14.357429718875501,
9.25925925925926
],
"eval_ref_len": 4755,
"eval_runtime": 46.2792,
"eval_samples_per_second": 6.482,
"eval_score": 2.532809945547002,
"eval_steps_per_second": 6.482,
"eval_sys_len": 1572,
"eval_totals": [
1572,
1272,
996,
756
],
"gpu_memory": 3076460544,
"step": 4068
},
{
"epoch": 12.08,
"gpu_memory": 3076460544,
"learning_rate": 1.3890127388535031e-05,
"loss": 0.8264,
"step": 4096
},
{
"epoch": 12.46,
"gpu_memory": 3076460544,
"learning_rate": 1.3227707006369426e-05,
"loss": 0.8008,
"step": 4224
},
{
"epoch": 12.84,
"gpu_memory": 3076460544,
"learning_rate": 1.2565286624203822e-05,
"loss": 0.7963,
"step": 4352
},
{
"epoch": 13.0,
"eval_bp": 0.18517745860640325,
"eval_counts": [
782,
310,
160,
79
],
"eval_loss": 1.7276182174682617,
"eval_precisions": [
44.18079096045198,
21.08843537414966,
13.43408900083963,
8.44017094017094
],
"eval_ref_len": 4755,
"eval_runtime": 49.531,
"eval_samples_per_second": 6.057,
"eval_score": 3.3384697611529055,
"eval_steps_per_second": 6.057,
"eval_sys_len": 1770,
"eval_totals": [
1770,
1470,
1191,
936
],
"gpu_memory": 3076460544,
"step": 4407
},
{
"epoch": 13.22,
"gpu_memory": 3076460544,
"learning_rate": 1.1902866242038214e-05,
"loss": 0.791,
"step": 4480
},
{
"epoch": 13.59,
"gpu_memory": 3076460544,
"learning_rate": 1.124044585987261e-05,
"loss": 0.7591,
"step": 4608
},
{
"epoch": 13.97,
"gpu_memory": 3076460544,
"learning_rate": 1.0578025477707005e-05,
"loss": 0.7651,
"step": 4736
},
{
"epoch": 14.0,
"eval_bp": 0.17762954994257873,
"eval_counts": [
784,
310,
160,
81
],
"eval_loss": 1.788110375404358,
"eval_precisions": [
44.97991967871486,
21.48302148302148,
13.605442176870747,
8.653846153846153
],
"eval_ref_len": 4755,
"eval_runtime": 48.2995,
"eval_samples_per_second": 6.211,
"eval_score": 3.262302153360586,
"eval_steps_per_second": 6.211,
"eval_sys_len": 1743,
"eval_totals": [
1743,
1443,
1176,
936
],
"gpu_memory": 3076460544,
"step": 4746
},
{
"epoch": 14.35,
"gpu_memory": 3076460544,
"learning_rate": 9.9156050955414e-06,
"loss": 0.7389,
"step": 4864
},
{
"epoch": 14.72,
"gpu_memory": 3076460544,
"learning_rate": 9.253184713375794e-06,
"loss": 0.7292,
"step": 4992
},
{
"epoch": 15.0,
"eval_bp": 0.19451009506119815,
"eval_counts": [
756,
286,
139,
66
],
"eval_loss": 1.8334678411483765,
"eval_precisions": [
41.930116472545755,
19.028609447771125,
11.356209150326798,
6.790123456790123
],
"eval_ref_len": 4755,
"eval_runtime": 50.9389,
"eval_samples_per_second": 5.889,
"eval_score": 3.063396343878355,
"eval_steps_per_second": 5.889,
"eval_sys_len": 1803,
"eval_totals": [
1803,
1503,
1224,
972
],
"gpu_memory": 3076460544,
"step": 5085
},
{
"epoch": 15.1,
"gpu_memory": 3076460544,
"learning_rate": 8.59076433121019e-06,
"loss": 0.7051,
"step": 5120
},
{
"epoch": 15.48,
"gpu_memory": 3076460544,
"learning_rate": 7.928343949044585e-06,
"loss": 0.6872,
"step": 5248
},
{
"epoch": 15.86,
"gpu_memory": 3076460544,
"learning_rate": 7.265923566878981e-06,
"loss": 0.6935,
"step": 5376
},
{
"epoch": 16.0,
"eval_bp": 0.2204937574447589,
"eval_counts": [
792,
311,
160,
80
],
"eval_loss": 1.8358988761901855,
"eval_precisions": [
41.83835182250396,
19.522912743251727,
12.121212121212121,
7.469654528478058
],
"eval_ref_len": 4755,
"eval_runtime": 51.7295,
"eval_samples_per_second": 5.799,
"eval_score": 3.6361160482722528,
"eval_steps_per_second": 5.799,
"eval_sys_len": 1893,
"eval_totals": [
1893,
1593,
1320,
1071
],
"gpu_memory": 3076460544,
"step": 5424
},
{
"epoch": 16.24,
"gpu_memory": 3076460544,
"learning_rate": 6.6035031847133755e-06,
"loss": 0.6808,
"step": 5504
},
{
"epoch": 16.61,
"gpu_memory": 3076460544,
"learning_rate": 5.94108280254777e-06,
"loss": 0.6649,
"step": 5632
},
{
"epoch": 16.99,
"gpu_memory": 3076460544,
"learning_rate": 5.278662420382165e-06,
"loss": 0.6902,
"step": 5760
},
{
"epoch": 17.0,
"eval_bp": 0.27059488659440983,
"eval_counts": [
875,
346,
196,
113
],
"eval_loss": 1.8474984169006348,
"eval_precisions": [
42.45511887433285,
19.64792731402612,
13.198653198653199,
9.254709254709255
],
"eval_ref_len": 4755,
"eval_runtime": 53.1106,
"eval_samples_per_second": 5.649,
"eval_score": 4.834531406134382,
"eval_steps_per_second": 5.649,
"eval_sys_len": 2061,
"eval_totals": [
2061,
1761,
1485,
1221
],
"gpu_memory": 3076460544,
"step": 5763
}
],
"max_steps": 6780,
"num_train_epochs": 20,
"total_flos": 5005888091043840.0,
"trial_name": null,
"trial_params": null
}