|
{ |
|
"best_metric": 1.5042036771774292, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-150", |
|
"epoch": 3.009009009009009, |
|
"eval_steps": 50, |
|
"global_step": 167, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"grad_norm": 0.20094779133796692, |
|
"learning_rate": 1e-05, |
|
"loss": 1.7238, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"eval_loss": 2.4445245265960693, |
|
"eval_runtime": 1.9421, |
|
"eval_samples_per_second": 48.402, |
|
"eval_steps_per_second": 12.358, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.036036036036036036, |
|
"grad_norm": 0.23159636557102203, |
|
"learning_rate": 2e-05, |
|
"loss": 2.3157, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.05405405405405406, |
|
"grad_norm": 0.17576082050800323, |
|
"learning_rate": 3e-05, |
|
"loss": 1.889, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07207207207207207, |
|
"grad_norm": 0.18563885986804962, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8117, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.09009009009009009, |
|
"grad_norm": 0.2837836742401123, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5249, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10810810810810811, |
|
"grad_norm": 0.5448350310325623, |
|
"learning_rate": 6e-05, |
|
"loss": 2.5175, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.12612612612612611, |
|
"grad_norm": 0.5622051358222961, |
|
"learning_rate": 7e-05, |
|
"loss": 2.6268, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.14414414414414414, |
|
"grad_norm": 0.5050106048583984, |
|
"learning_rate": 8e-05, |
|
"loss": 2.5345, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.16216216216216217, |
|
"grad_norm": 0.3459974229335785, |
|
"learning_rate": 9e-05, |
|
"loss": 2.6653, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.18018018018018017, |
|
"grad_norm": 0.2534170150756836, |
|
"learning_rate": 0.0001, |
|
"loss": 1.9698, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1981981981981982, |
|
"grad_norm": 0.2756398916244507, |
|
"learning_rate": 9.998999018714263e-05, |
|
"loss": 1.7193, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.21621621621621623, |
|
"grad_norm": 0.3350951075553894, |
|
"learning_rate": 9.995996475642466e-05, |
|
"loss": 2.133, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.23423423423423423, |
|
"grad_norm": 0.5594307780265808, |
|
"learning_rate": 9.990993572980378e-05, |
|
"loss": 3.0643, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.25225225225225223, |
|
"grad_norm": 0.13822466135025024, |
|
"learning_rate": 9.983992313852774e-05, |
|
"loss": 1.7842, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 0.14877331256866455, |
|
"learning_rate": 9.974995501511404e-05, |
|
"loss": 2.2152, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2882882882882883, |
|
"grad_norm": 0.13884077966213226, |
|
"learning_rate": 9.964006738212575e-05, |
|
"loss": 1.8158, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3063063063063063, |
|
"grad_norm": 0.17055541276931763, |
|
"learning_rate": 9.951030423774859e-05, |
|
"loss": 2.0037, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.32432432432432434, |
|
"grad_norm": 0.44902870059013367, |
|
"learning_rate": 9.936071753817415e-05, |
|
"loss": 2.1393, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.34234234234234234, |
|
"grad_norm": 0.4291950762271881, |
|
"learning_rate": 9.919136717679722e-05, |
|
"loss": 2.232, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.36036036036036034, |
|
"grad_norm": 0.46815672516822815, |
|
"learning_rate": 9.900232096023477e-05, |
|
"loss": 2.1575, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3783783783783784, |
|
"grad_norm": 0.35431531071662903, |
|
"learning_rate": 9.879365458117678e-05, |
|
"loss": 2.1213, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.3963963963963964, |
|
"grad_norm": 0.2607969045639038, |
|
"learning_rate": 9.856545158807938e-05, |
|
"loss": 2.4743, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4144144144144144, |
|
"grad_norm": 0.20863136649131775, |
|
"learning_rate": 9.831780335171279e-05, |
|
"loss": 1.7214, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 0.31846967339515686, |
|
"learning_rate": 9.805080902857699e-05, |
|
"loss": 2.2591, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.45045045045045046, |
|
"grad_norm": 0.34938788414001465, |
|
"learning_rate": 9.776457552120033e-05, |
|
"loss": 2.2996, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.46846846846846846, |
|
"grad_norm": 0.6246058940887451, |
|
"learning_rate": 9.745921743533653e-05, |
|
"loss": 2.1906, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4864864864864865, |
|
"grad_norm": 0.14733730256557465, |
|
"learning_rate": 9.713485703407731e-05, |
|
"loss": 1.6917, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5045045045045045, |
|
"grad_norm": 0.2119920402765274, |
|
"learning_rate": 9.679162418889931e-05, |
|
"loss": 2.1286, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5225225225225225, |
|
"grad_norm": 0.20382221043109894, |
|
"learning_rate": 9.642965632766436e-05, |
|
"loss": 1.889, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.22297801077365875, |
|
"learning_rate": 9.604909837959455e-05, |
|
"loss": 2.1375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5585585585585585, |
|
"grad_norm": 0.29066169261932373, |
|
"learning_rate": 9.565010271724352e-05, |
|
"loss": 1.6588, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5765765765765766, |
|
"grad_norm": 0.4789801836013794, |
|
"learning_rate": 9.523282909548773e-05, |
|
"loss": 1.7155, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5945945945945946, |
|
"grad_norm": 0.5113940834999084, |
|
"learning_rate": 9.47974445875617e-05, |
|
"loss": 1.68, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6126126126126126, |
|
"grad_norm": 0.5598991513252258, |
|
"learning_rate": 9.434412351816328e-05, |
|
"loss": 1.5835, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6306306306306306, |
|
"grad_norm": 0.23446883261203766, |
|
"learning_rate": 9.387304739365523e-05, |
|
"loss": 2.0415, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6486486486486487, |
|
"grad_norm": 0.23417960107326508, |
|
"learning_rate": 9.338440482939146e-05, |
|
"loss": 1.9709, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.2393627017736435, |
|
"learning_rate": 9.287839147419686e-05, |
|
"loss": 1.809, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6846846846846847, |
|
"grad_norm": 0.37448129057884216, |
|
"learning_rate": 9.2355209932031e-05, |
|
"loss": 2.3969, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.7027027027027027, |
|
"grad_norm": 0.5388697385787964, |
|
"learning_rate": 9.181506968086697e-05, |
|
"loss": 2.8039, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7207207207207207, |
|
"grad_norm": 0.13199251890182495, |
|
"learning_rate": 9.125818698881798e-05, |
|
"loss": 1.6472, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7387387387387387, |
|
"grad_norm": 0.21171270310878754, |
|
"learning_rate": 9.068478482754532e-05, |
|
"loss": 2.5439, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.7567567567567568, |
|
"grad_norm": 0.19285422563552856, |
|
"learning_rate": 9.0095092782982e-05, |
|
"loss": 1.7294, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7747747747747747, |
|
"grad_norm": 0.19233770668506622, |
|
"learning_rate": 8.948934696340843e-05, |
|
"loss": 1.8547, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7927927927927928, |
|
"grad_norm": 2.065861225128174, |
|
"learning_rate": 8.886778990491631e-05, |
|
"loss": 1.4244, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 1.3515313863754272, |
|
"learning_rate": 8.823067047429907e-05, |
|
"loss": 1.3373, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8288288288288288, |
|
"grad_norm": 0.7247907519340515, |
|
"learning_rate": 8.757824376940746e-05, |
|
"loss": 1.1633, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.8468468468468469, |
|
"grad_norm": 0.5490639209747314, |
|
"learning_rate": 8.691077101701024e-05, |
|
"loss": 1.3324, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.2311074584722519, |
|
"learning_rate": 8.622851946820095e-05, |
|
"loss": 2.2242, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8828828828828829, |
|
"grad_norm": 0.20029251277446747, |
|
"learning_rate": 8.553176229139261e-05, |
|
"loss": 1.7394, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.9009009009009009, |
|
"grad_norm": 0.27592265605926514, |
|
"learning_rate": 8.482077846294308e-05, |
|
"loss": 2.1438, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9009009009009009, |
|
"eval_loss": 1.783769965171814, |
|
"eval_runtime": 1.9447, |
|
"eval_samples_per_second": 48.337, |
|
"eval_steps_per_second": 12.341, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.918918918918919, |
|
"grad_norm": 0.3021310269832611, |
|
"learning_rate": 8.409585265545509e-05, |
|
"loss": 2.0702, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.9369369369369369, |
|
"grad_norm": 0.5802788734436035, |
|
"learning_rate": 8.335727512379534e-05, |
|
"loss": 2.8902, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.954954954954955, |
|
"grad_norm": 0.1312423199415207, |
|
"learning_rate": 8.260534158887876e-05, |
|
"loss": 1.7881, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.972972972972973, |
|
"grad_norm": 0.742950439453125, |
|
"learning_rate": 8.184035311926396e-05, |
|
"loss": 1.1818, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.990990990990991, |
|
"grad_norm": 0.3108900189399719, |
|
"learning_rate": 8.106261601060772e-05, |
|
"loss": 1.7755, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.009009009009009, |
|
"grad_norm": 0.5375937223434448, |
|
"learning_rate": 8.027244166302642e-05, |
|
"loss": 3.1039, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.027027027027027, |
|
"grad_norm": 0.21149364113807678, |
|
"learning_rate": 7.947014645641379e-05, |
|
"loss": 2.5015, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.045045045045045, |
|
"grad_norm": 0.1894480586051941, |
|
"learning_rate": 7.865605162376486e-05, |
|
"loss": 1.9007, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.063063063063063, |
|
"grad_norm": 0.20430035889148712, |
|
"learning_rate": 7.783048312255653e-05, |
|
"loss": 1.7878, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.2826579511165619, |
|
"learning_rate": 7.699377150423672e-05, |
|
"loss": 1.5133, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0990990990990992, |
|
"grad_norm": 0.7269498705863953, |
|
"learning_rate": 7.614625178187402e-05, |
|
"loss": 0.8433, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.117117117117117, |
|
"grad_norm": 0.6260727047920227, |
|
"learning_rate": 7.528826329602099e-05, |
|
"loss": 0.8916, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.135135135135135, |
|
"grad_norm": 0.5703617334365845, |
|
"learning_rate": 7.442014957884472e-05, |
|
"loss": 0.8438, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1531531531531531, |
|
"grad_norm": 0.27178001403808594, |
|
"learning_rate": 7.354225821657914e-05, |
|
"loss": 2.1417, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.1711711711711712, |
|
"grad_norm": 0.23363694548606873, |
|
"learning_rate": 7.265494071035401e-05, |
|
"loss": 1.7944, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.1891891891891893, |
|
"grad_norm": 0.2847091257572174, |
|
"learning_rate": 7.175855233545668e-05, |
|
"loss": 1.751, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.2072072072072073, |
|
"grad_norm": 0.3543185293674469, |
|
"learning_rate": 7.085345199908235e-05, |
|
"loss": 2.0152, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.2252252252252251, |
|
"grad_norm": 0.5327386856079102, |
|
"learning_rate": 6.994000209663036e-05, |
|
"loss": 2.6897, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.2432432432432432, |
|
"grad_norm": 0.2581247091293335, |
|
"learning_rate": 6.901856836660386e-05, |
|
"loss": 1.2071, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.2612612612612613, |
|
"grad_norm": 0.18185599148273468, |
|
"learning_rate": 6.808951974417078e-05, |
|
"loss": 2.24, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2792792792792793, |
|
"grad_norm": 0.21230140328407288, |
|
"learning_rate": 6.715322821344494e-05, |
|
"loss": 2.0791, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.2972972972972974, |
|
"grad_norm": 0.20122762024402618, |
|
"learning_rate": 6.621006865854644e-05, |
|
"loss": 1.6265, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.3153153153153152, |
|
"grad_norm": 0.22769460082054138, |
|
"learning_rate": 6.526041871350086e-05, |
|
"loss": 1.5291, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 1.8807597160339355, |
|
"learning_rate": 6.43046586110374e-05, |
|
"loss": 0.7391, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 1.10353422164917, |
|
"learning_rate": 6.334317103034652e-05, |
|
"loss": 0.6971, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.3693693693693694, |
|
"grad_norm": 0.6892307996749878, |
|
"learning_rate": 6.237634094385813e-05, |
|
"loss": 0.7755, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.3873873873873874, |
|
"grad_norm": 0.28367915749549866, |
|
"learning_rate": 6.140455546310148e-05, |
|
"loss": 2.0102, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 1.4054054054054055, |
|
"grad_norm": 0.354563444852829, |
|
"learning_rate": 6.042820368370854e-05, |
|
"loss": 2.0273, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.4234234234234235, |
|
"grad_norm": 0.2600463926792145, |
|
"learning_rate": 5.944767652962309e-05, |
|
"loss": 1.6145, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 1.4414414414414414, |
|
"grad_norm": 0.29232466220855713, |
|
"learning_rate": 5.8463366596577706e-05, |
|
"loss": 2.1392, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4594594594594594, |
|
"grad_norm": 0.3904821276664734, |
|
"learning_rate": 5.747566799490132e-05, |
|
"loss": 2.3455, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.4774774774774775, |
|
"grad_norm": 0.3034067153930664, |
|
"learning_rate": 5.648497619172042e-05, |
|
"loss": 1.6627, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 1.4954954954954955, |
|
"grad_norm": 0.18810759484767914, |
|
"learning_rate": 5.549168785261698e-05, |
|
"loss": 2.3614, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 1.5135135135135136, |
|
"grad_norm": 0.15777312219142914, |
|
"learning_rate": 5.4496200682806495e-05, |
|
"loss": 1.697, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.5315315315315314, |
|
"grad_norm": 0.19415871798992157, |
|
"learning_rate": 5.3498913267899864e-05, |
|
"loss": 1.9134, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.5495495495495497, |
|
"grad_norm": 0.17604829370975494, |
|
"learning_rate": 5.250022491431259e-05, |
|
"loss": 1.8209, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 1.5675675675675675, |
|
"grad_norm": 1.056093692779541, |
|
"learning_rate": 5.150053548938557e-05, |
|
"loss": 0.6133, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.5855855855855856, |
|
"grad_norm": 0.8092790246009827, |
|
"learning_rate": 5.0500245261281175e-05, |
|
"loss": 0.7028, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 1.6036036036036037, |
|
"grad_norm": 0.6385818123817444, |
|
"learning_rate": 4.949975473871884e-05, |
|
"loss": 0.4955, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 0.2813832759857178, |
|
"learning_rate": 4.849946451061443e-05, |
|
"loss": 1.7059, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.6396396396396398, |
|
"grad_norm": 0.25375688076019287, |
|
"learning_rate": 4.749977508568742e-05, |
|
"loss": 2.027, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 1.6576576576576576, |
|
"grad_norm": 0.28173476457595825, |
|
"learning_rate": 4.650108673210015e-05, |
|
"loss": 1.5781, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 1.6756756756756757, |
|
"grad_norm": 0.37018927931785583, |
|
"learning_rate": 4.550379931719351e-05, |
|
"loss": 1.9611, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.6936936936936937, |
|
"grad_norm": 0.44017812609672546, |
|
"learning_rate": 4.4508312147383036e-05, |
|
"loss": 1.9964, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.7117117117117115, |
|
"grad_norm": 0.27065154910087585, |
|
"learning_rate": 4.3515023808279586e-05, |
|
"loss": 1.7931, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.7297297297297298, |
|
"grad_norm": 0.1628153771162033, |
|
"learning_rate": 4.252433200509869e-05, |
|
"loss": 2.0541, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.7477477477477477, |
|
"grad_norm": 0.15087255835533142, |
|
"learning_rate": 4.1536633403422306e-05, |
|
"loss": 1.5446, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.7657657657657657, |
|
"grad_norm": 0.19169510900974274, |
|
"learning_rate": 4.0552323470376916e-05, |
|
"loss": 2.1779, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.7837837837837838, |
|
"grad_norm": 0.17723213136196136, |
|
"learning_rate": 3.9571796316291476e-05, |
|
"loss": 1.5049, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"grad_norm": 0.5622679591178894, |
|
"learning_rate": 3.859544453689853e-05, |
|
"loss": 0.538, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8018018018018018, |
|
"eval_loss": 1.5607163906097412, |
|
"eval_runtime": 1.8915, |
|
"eval_samples_per_second": 49.696, |
|
"eval_steps_per_second": 12.688, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8198198198198199, |
|
"grad_norm": 0.459534227848053, |
|
"learning_rate": 3.762365905614187e-05, |
|
"loss": 0.6408, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.8378378378378377, |
|
"grad_norm": 0.45416250824928284, |
|
"learning_rate": 3.665682896965349e-05, |
|
"loss": 0.6204, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.855855855855856, |
|
"grad_norm": 0.3494967818260193, |
|
"learning_rate": 3.5695341388962614e-05, |
|
"loss": 0.8394, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.8738738738738738, |
|
"grad_norm": 0.20583558082580566, |
|
"learning_rate": 3.473958128649915e-05, |
|
"loss": 1.7962, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 0.2505161762237549, |
|
"learning_rate": 3.378993134145356e-05, |
|
"loss": 1.8203, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.90990990990991, |
|
"grad_norm": 0.2894545793533325, |
|
"learning_rate": 3.284677178655507e-05, |
|
"loss": 1.7578, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.9279279279279278, |
|
"grad_norm": 0.36985740065574646, |
|
"learning_rate": 3.1910480255829237e-05, |
|
"loss": 2.3144, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.945945945945946, |
|
"grad_norm": 0.29496514797210693, |
|
"learning_rate": 3.098143163339615e-05, |
|
"loss": 2.2301, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.9639639639639639, |
|
"grad_norm": 0.13244111835956573, |
|
"learning_rate": 3.0059997903369656e-05, |
|
"loss": 1.2973, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.981981981981982, |
|
"grad_norm": 0.1934548020362854, |
|
"learning_rate": 2.914654800091768e-05, |
|
"loss": 1.0715, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.7730455994606018, |
|
"learning_rate": 2.824144766454333e-05, |
|
"loss": 2.955, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 2.018018018018018, |
|
"grad_norm": 0.14186394214630127, |
|
"learning_rate": 2.7345059289646008e-05, |
|
"loss": 1.6136, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.036036036036036, |
|
"grad_norm": 0.1974637508392334, |
|
"learning_rate": 2.6457741783420886e-05, |
|
"loss": 2.1953, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 2.054054054054054, |
|
"grad_norm": 0.15466494858264923, |
|
"learning_rate": 2.5579850421155293e-05, |
|
"loss": 1.5703, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.0720720720720722, |
|
"grad_norm": 0.17066383361816406, |
|
"learning_rate": 2.4711736703979018e-05, |
|
"loss": 1.8892, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.09009009009009, |
|
"grad_norm": 0.18141521513462067, |
|
"learning_rate": 2.3853748218126e-05, |
|
"loss": 0.7031, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.108108108108108, |
|
"grad_norm": 0.1847659945487976, |
|
"learning_rate": 2.3006228495763295e-05, |
|
"loss": 0.425, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 2.126126126126126, |
|
"grad_norm": 0.18299931287765503, |
|
"learning_rate": 2.2169516877443485e-05, |
|
"loss": 0.4302, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.144144144144144, |
|
"grad_norm": 0.1804618388414383, |
|
"learning_rate": 2.1343948376235144e-05, |
|
"loss": 1.5657, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 0.22961750626564026, |
|
"learning_rate": 2.052985354358622e-05, |
|
"loss": 1.9153, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.18018018018018, |
|
"grad_norm": 0.24420714378356934, |
|
"learning_rate": 1.9727558336973595e-05, |
|
"loss": 1.726, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 2.1981981981981984, |
|
"grad_norm": 0.27104926109313965, |
|
"learning_rate": 1.8937383989392294e-05, |
|
"loss": 1.7018, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.2162162162162162, |
|
"grad_norm": 0.37170183658599854, |
|
"learning_rate": 1.8159646880736036e-05, |
|
"loss": 1.9319, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 2.234234234234234, |
|
"grad_norm": 0.4491060674190521, |
|
"learning_rate": 1.739465841112125e-05, |
|
"loss": 2.0411, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.2522522522522523, |
|
"grad_norm": 0.1254836916923523, |
|
"learning_rate": 1.664272487620466e-05, |
|
"loss": 1.4904, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.27027027027027, |
|
"grad_norm": 0.15240132808685303, |
|
"learning_rate": 1.590414734454493e-05, |
|
"loss": 2.1468, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.2882882882882885, |
|
"grad_norm": 0.16004954278469086, |
|
"learning_rate": 1.517922153705692e-05, |
|
"loss": 1.8539, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.3063063063063063, |
|
"grad_norm": 0.18988263607025146, |
|
"learning_rate": 1.4468237708607397e-05, |
|
"loss": 1.7977, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 2.3243243243243246, |
|
"grad_norm": 0.18853111565113068, |
|
"learning_rate": 1.3771480531799052e-05, |
|
"loss": 1.6049, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 2.3423423423423424, |
|
"grad_norm": 0.1556941568851471, |
|
"learning_rate": 1.308922898298977e-05, |
|
"loss": 0.3974, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.3603603603603602, |
|
"grad_norm": 0.17364652454853058, |
|
"learning_rate": 1.2421756230592534e-05, |
|
"loss": 0.4047, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 2.3783783783783785, |
|
"grad_norm": 0.15375109016895294, |
|
"learning_rate": 1.1769329525700935e-05, |
|
"loss": 0.443, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.3963963963963963, |
|
"grad_norm": 0.2470708191394806, |
|
"learning_rate": 1.1132210095083694e-05, |
|
"loss": 1.5159, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 2.4144144144144146, |
|
"grad_norm": 0.21014344692230225, |
|
"learning_rate": 1.0510653036591583e-05, |
|
"loss": 1.8404, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 0.23266132175922394, |
|
"learning_rate": 9.904907217018e-06, |
|
"loss": 1.3737, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.4504504504504503, |
|
"grad_norm": 0.32797127962112427, |
|
"learning_rate": 9.31521517245469e-06, |
|
"loss": 2.1399, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 2.4684684684684686, |
|
"grad_norm": 0.4745734930038452, |
|
"learning_rate": 8.741813011182014e-06, |
|
"loss": 2.2424, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 2.4864864864864864, |
|
"grad_norm": 0.13079120218753815, |
|
"learning_rate": 8.18493031913305e-06, |
|
"loss": 1.5224, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.5045045045045047, |
|
"grad_norm": 0.1728202998638153, |
|
"learning_rate": 7.644790067969005e-06, |
|
"loss": 2.0656, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 2.5225225225225225, |
|
"grad_norm": 0.20250830054283142, |
|
"learning_rate": 7.1216085258031414e-06, |
|
"loss": 2.0862, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.5405405405405403, |
|
"grad_norm": 0.16197475790977478, |
|
"learning_rate": 6.6155951706085405e-06, |
|
"loss": 1.4935, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.5585585585585586, |
|
"grad_norm": 0.15914876759052277, |
|
"learning_rate": 6.1269526063447765e-06, |
|
"loss": 0.5048, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.5765765765765765, |
|
"grad_norm": 0.18777939677238464, |
|
"learning_rate": 5.6558764818367195e-06, |
|
"loss": 0.6094, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 2.5945945945945947, |
|
"grad_norm": 0.14933425188064575, |
|
"learning_rate": 5.2025554124383095e-06, |
|
"loss": 0.4805, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.6126126126126126, |
|
"grad_norm": 0.1998123824596405, |
|
"learning_rate": 4.767170904512292e-06, |
|
"loss": 1.3946, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.6306306306306304, |
|
"grad_norm": 0.270035982131958, |
|
"learning_rate": 4.349897282756487e-06, |
|
"loss": 2.0762, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 2.6486486486486487, |
|
"grad_norm": 0.22045235335826874, |
|
"learning_rate": 3.95090162040545e-06, |
|
"loss": 1.8445, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.25569671392440796, |
|
"learning_rate": 3.570343672335641e-06, |
|
"loss": 1.7808, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 2.684684684684685, |
|
"grad_norm": 0.3833853602409363, |
|
"learning_rate": 3.2083758111006945e-06, |
|
"loss": 1.7097, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 0.46845442056655884, |
|
"learning_rate": 2.86514296592269e-06, |
|
"loss": 2.2938, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"eval_loss": 1.5042036771774292, |
|
"eval_runtime": 1.8931, |
|
"eval_samples_per_second": 49.654, |
|
"eval_steps_per_second": 12.678, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.7207207207207205, |
|
"grad_norm": 0.1713583618402481, |
|
"learning_rate": 2.54078256466348e-06, |
|
"loss": 1.8336, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 2.7387387387387387, |
|
"grad_norm": 0.17491984367370605, |
|
"learning_rate": 2.2354244787996748e-06, |
|
"loss": 2.1273, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 2.756756756756757, |
|
"grad_norm": 0.1518978625535965, |
|
"learning_rate": 1.9491909714230204e-06, |
|
"loss": 1.8808, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.774774774774775, |
|
"grad_norm": 0.16476936638355255, |
|
"learning_rate": 1.6821966482872264e-06, |
|
"loss": 1.7178, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 2.7927927927927927, |
|
"grad_norm": 0.16660767793655396, |
|
"learning_rate": 1.434548411920622e-06, |
|
"loss": 1.1728, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.810810810810811, |
|
"grad_norm": 0.15310387313365936, |
|
"learning_rate": 1.206345418823235e-06, |
|
"loss": 0.4533, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.828828828828829, |
|
"grad_norm": 0.16932179033756256, |
|
"learning_rate": 9.976790397652315e-07, |
|
"loss": 0.4293, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 2.846846846846847, |
|
"grad_norm": 0.2117478847503662, |
|
"learning_rate": 8.086328232027873e-07, |
|
"loss": 0.5155, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 2.864864864864865, |
|
"grad_norm": 0.22566819190979004, |
|
"learning_rate": 6.392824618258519e-07, |
|
"loss": 1.8127, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.8828828828828827, |
|
"grad_norm": 0.2261633276939392, |
|
"learning_rate": 4.896957622514298e-07, |
|
"loss": 1.6093, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.900900900900901, |
|
"grad_norm": 0.2446056753396988, |
|
"learning_rate": 3.599326178742535e-07, |
|
"loss": 1.5191, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 2.918918918918919, |
|
"grad_norm": 0.3451971411705017, |
|
"learning_rate": 2.500449848859776e-07, |
|
"loss": 1.9343, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.936936936936937, |
|
"grad_norm": 0.46758314967155457, |
|
"learning_rate": 1.6007686147225254e-07, |
|
"loss": 2.3823, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 2.954954954954955, |
|
"grad_norm": 0.15292246639728546, |
|
"learning_rate": 9.006427019622176e-08, |
|
"loss": 1.9701, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 0.1602296382188797, |
|
"learning_rate": 4.0035243575342605e-08, |
|
"loss": 0.7246, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.990990990990991, |
|
"grad_norm": 0.2040543258190155, |
|
"learning_rate": 1.0009812857370016e-08, |
|
"loss": 1.3962, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 3.009009009009009, |
|
"grad_norm": 0.4154304265975952, |
|
"learning_rate": 0.0, |
|
"loss": 2.7024, |
|
"step": 167 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 167, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.268381537599488e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|