|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 2000, |
|
"global_step": 21900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00684931506849315, |
|
"grad_norm": 55.08233642578125, |
|
"learning_rate": 2.9943835616438356e-05, |
|
"loss": 4.9626, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0136986301369863, |
|
"grad_norm": 61.599609375, |
|
"learning_rate": 2.987808219178082e-05, |
|
"loss": 3.2637, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02054794520547945, |
|
"grad_norm": 54.39605712890625, |
|
"learning_rate": 2.980958904109589e-05, |
|
"loss": 2.5155, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0273972602739726, |
|
"grad_norm": 35.411338806152344, |
|
"learning_rate": 2.974109589041096e-05, |
|
"loss": 2.3836, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03424657534246575, |
|
"grad_norm": 38.87581253051758, |
|
"learning_rate": 2.9672602739726026e-05, |
|
"loss": 2.1592, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0410958904109589, |
|
"grad_norm": 43.26409149169922, |
|
"learning_rate": 2.9604109589041095e-05, |
|
"loss": 2.0851, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04794520547945205, |
|
"grad_norm": 32.2227897644043, |
|
"learning_rate": 2.9535616438356165e-05, |
|
"loss": 1.833, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"grad_norm": 34.12604904174805, |
|
"learning_rate": 2.9467123287671234e-05, |
|
"loss": 1.7324, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06164383561643835, |
|
"grad_norm": 802.5269165039062, |
|
"learning_rate": 2.93986301369863e-05, |
|
"loss": 1.715, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 21.16217803955078, |
|
"learning_rate": 2.933013698630137e-05, |
|
"loss": 1.7966, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07534246575342465, |
|
"grad_norm": 53.79206466674805, |
|
"learning_rate": 2.926164383561644e-05, |
|
"loss": 1.7756, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0821917808219178, |
|
"grad_norm": 69.78266906738281, |
|
"learning_rate": 2.919315068493151e-05, |
|
"loss": 1.6611, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08904109589041095, |
|
"grad_norm": 19.355026245117188, |
|
"learning_rate": 2.9124657534246575e-05, |
|
"loss": 1.5454, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.0958904109589041, |
|
"grad_norm": 32.797298431396484, |
|
"learning_rate": 2.9056164383561644e-05, |
|
"loss": 1.533, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10273972602739725, |
|
"grad_norm": 22.2159423828125, |
|
"learning_rate": 2.8987671232876714e-05, |
|
"loss": 1.555, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 41.200904846191406, |
|
"learning_rate": 2.8919178082191783e-05, |
|
"loss": 1.5322, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.11643835616438356, |
|
"grad_norm": 22.64232635498047, |
|
"learning_rate": 2.885068493150685e-05, |
|
"loss": 1.5337, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1232876712328767, |
|
"grad_norm": 38.510257720947266, |
|
"learning_rate": 2.878219178082192e-05, |
|
"loss": 1.5001, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.13013698630136986, |
|
"grad_norm": 23.04859733581543, |
|
"learning_rate": 2.871369863013699e-05, |
|
"loss": 1.4488, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 23.516109466552734, |
|
"learning_rate": 2.8645205479452058e-05, |
|
"loss": 1.3247, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14383561643835616, |
|
"grad_norm": 24.6121883392334, |
|
"learning_rate": 2.8576712328767124e-05, |
|
"loss": 1.3537, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.1506849315068493, |
|
"grad_norm": 29.713895797729492, |
|
"learning_rate": 2.8508219178082194e-05, |
|
"loss": 1.4116, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.15753424657534246, |
|
"grad_norm": 102.77496337890625, |
|
"learning_rate": 2.8439726027397263e-05, |
|
"loss": 1.3196, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.1643835616438356, |
|
"grad_norm": 46.55476379394531, |
|
"learning_rate": 2.8371232876712332e-05, |
|
"loss": 1.3987, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.17123287671232876, |
|
"grad_norm": 22.365116119384766, |
|
"learning_rate": 2.8302739726027395e-05, |
|
"loss": 1.3027, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1780821917808219, |
|
"grad_norm": 48.17926788330078, |
|
"learning_rate": 2.8234246575342465e-05, |
|
"loss": 1.3698, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.18493150684931506, |
|
"grad_norm": 29.022974014282227, |
|
"learning_rate": 2.8165753424657534e-05, |
|
"loss": 1.2422, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1917808219178082, |
|
"grad_norm": 23.812314987182617, |
|
"learning_rate": 2.8097260273972604e-05, |
|
"loss": 1.2691, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.19863013698630136, |
|
"grad_norm": 18.73370361328125, |
|
"learning_rate": 2.802876712328767e-05, |
|
"loss": 1.3611, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 23.951034545898438, |
|
"learning_rate": 2.796027397260274e-05, |
|
"loss": 1.3148, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21232876712328766, |
|
"grad_norm": 33.09256362915039, |
|
"learning_rate": 2.789178082191781e-05, |
|
"loss": 1.3978, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 23.944395065307617, |
|
"learning_rate": 2.7823287671232878e-05, |
|
"loss": 1.2173, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.22602739726027396, |
|
"grad_norm": 16.93844223022461, |
|
"learning_rate": 2.7754794520547944e-05, |
|
"loss": 1.3082, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2328767123287671, |
|
"grad_norm": 21.014097213745117, |
|
"learning_rate": 2.7686301369863014e-05, |
|
"loss": 1.3363, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.23972602739726026, |
|
"grad_norm": 34.768096923828125, |
|
"learning_rate": 2.7617808219178083e-05, |
|
"loss": 1.1646, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2465753424657534, |
|
"grad_norm": 19.525144577026367, |
|
"learning_rate": 2.7549315068493153e-05, |
|
"loss": 1.2566, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2534246575342466, |
|
"grad_norm": 17.319896697998047, |
|
"learning_rate": 2.748082191780822e-05, |
|
"loss": 1.2123, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.2602739726027397, |
|
"grad_norm": 21.581501007080078, |
|
"learning_rate": 2.7412328767123288e-05, |
|
"loss": 1.4083, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2671232876712329, |
|
"grad_norm": 38.267120361328125, |
|
"learning_rate": 2.7343835616438358e-05, |
|
"loss": 1.1987, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 64.0147933959961, |
|
"learning_rate": 2.7275342465753427e-05, |
|
"loss": 1.1474, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"eval_exact_match": 71.11636707663197, |
|
"eval_f1": 81.37469585294956, |
|
"eval_runtime": 406.8449, |
|
"eval_samples_per_second": 25.98, |
|
"eval_steps_per_second": 1.625, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2808219178082192, |
|
"grad_norm": 22.60133171081543, |
|
"learning_rate": 2.7206849315068493e-05, |
|
"loss": 1.3887, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.2876712328767123, |
|
"grad_norm": 51.95814514160156, |
|
"learning_rate": 2.7138356164383563e-05, |
|
"loss": 1.2049, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2945205479452055, |
|
"grad_norm": 11.237629890441895, |
|
"learning_rate": 2.7069863013698632e-05, |
|
"loss": 1.1684, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.3013698630136986, |
|
"grad_norm": 23.52985954284668, |
|
"learning_rate": 2.70013698630137e-05, |
|
"loss": 1.2162, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.3082191780821918, |
|
"grad_norm": 19.681074142456055, |
|
"learning_rate": 2.6932876712328768e-05, |
|
"loss": 1.0853, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.3150684931506849, |
|
"grad_norm": 13.592674255371094, |
|
"learning_rate": 2.6864383561643837e-05, |
|
"loss": 1.2977, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.3219178082191781, |
|
"grad_norm": 30.62900733947754, |
|
"learning_rate": 2.6795890410958907e-05, |
|
"loss": 1.1385, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.3287671232876712, |
|
"grad_norm": 15.438138961791992, |
|
"learning_rate": 2.6727397260273976e-05, |
|
"loss": 1.1983, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3356164383561644, |
|
"grad_norm": 21.04355812072754, |
|
"learning_rate": 2.6658904109589042e-05, |
|
"loss": 1.2473, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 101.09291076660156, |
|
"learning_rate": 2.659041095890411e-05, |
|
"loss": 1.2827, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3493150684931507, |
|
"grad_norm": 24.547096252441406, |
|
"learning_rate": 2.6521917808219178e-05, |
|
"loss": 1.2088, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.3561643835616438, |
|
"grad_norm": 17.362468719482422, |
|
"learning_rate": 2.6453424657534247e-05, |
|
"loss": 1.1586, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.363013698630137, |
|
"grad_norm": 22.94930076599121, |
|
"learning_rate": 2.6384931506849313e-05, |
|
"loss": 1.1435, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.3698630136986301, |
|
"grad_norm": 9.875961303710938, |
|
"learning_rate": 2.6316438356164383e-05, |
|
"loss": 1.2004, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3767123287671233, |
|
"grad_norm": 39.17120361328125, |
|
"learning_rate": 2.6247945205479452e-05, |
|
"loss": 1.1009, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.3835616438356164, |
|
"grad_norm": 29.799320220947266, |
|
"learning_rate": 2.6179452054794522e-05, |
|
"loss": 1.1917, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3904109589041096, |
|
"grad_norm": 15.327347755432129, |
|
"learning_rate": 2.6110958904109588e-05, |
|
"loss": 1.1141, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.3972602739726027, |
|
"grad_norm": 63.62096405029297, |
|
"learning_rate": 2.6042465753424657e-05, |
|
"loss": 1.1083, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.4041095890410959, |
|
"grad_norm": 77.25872802734375, |
|
"learning_rate": 2.5973972602739727e-05, |
|
"loss": 1.0962, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 26.37015724182129, |
|
"learning_rate": 2.5905479452054796e-05, |
|
"loss": 1.1158, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4178082191780822, |
|
"grad_norm": 30.89618682861328, |
|
"learning_rate": 2.5836986301369862e-05, |
|
"loss": 1.1356, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.4246575342465753, |
|
"grad_norm": 26.392322540283203, |
|
"learning_rate": 2.5768493150684932e-05, |
|
"loss": 0.9842, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.4315068493150685, |
|
"grad_norm": 19.746112823486328, |
|
"learning_rate": 2.57e-05, |
|
"loss": 1.0477, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 21.304296493530273, |
|
"learning_rate": 2.563150684931507e-05, |
|
"loss": 1.1522, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.4452054794520548, |
|
"grad_norm": 21.67245864868164, |
|
"learning_rate": 2.5563013698630137e-05, |
|
"loss": 1.0677, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.4520547945205479, |
|
"grad_norm": 17.96895980834961, |
|
"learning_rate": 2.5494520547945206e-05, |
|
"loss": 1.0657, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.4589041095890411, |
|
"grad_norm": 19.414440155029297, |
|
"learning_rate": 2.5426027397260276e-05, |
|
"loss": 1.1109, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.4657534246575342, |
|
"grad_norm": 20.33131217956543, |
|
"learning_rate": 2.5357534246575345e-05, |
|
"loss": 1.0958, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.4726027397260274, |
|
"grad_norm": 35.86481475830078, |
|
"learning_rate": 2.528904109589041e-05, |
|
"loss": 1.1134, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 20.007892608642578, |
|
"learning_rate": 2.522054794520548e-05, |
|
"loss": 1.0236, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4863013698630137, |
|
"grad_norm": 8.771307945251465, |
|
"learning_rate": 2.515205479452055e-05, |
|
"loss": 1.0518, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.4931506849315068, |
|
"grad_norm": 31.425060272216797, |
|
"learning_rate": 2.508356164383562e-05, |
|
"loss": 1.0397, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 33.32624816894531, |
|
"learning_rate": 2.5015068493150686e-05, |
|
"loss": 1.0374, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.5068493150684932, |
|
"grad_norm": 15.538783073425293, |
|
"learning_rate": 2.4946575342465755e-05, |
|
"loss": 1.2677, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.5136986301369864, |
|
"grad_norm": 32.81166076660156, |
|
"learning_rate": 2.4878082191780825e-05, |
|
"loss": 1.0274, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.5205479452054794, |
|
"grad_norm": 40.134246826171875, |
|
"learning_rate": 2.480958904109589e-05, |
|
"loss": 1.0493, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5273972602739726, |
|
"grad_norm": 19.939796447753906, |
|
"learning_rate": 2.4741095890410957e-05, |
|
"loss": 1.1492, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.5342465753424658, |
|
"grad_norm": 23.643762588500977, |
|
"learning_rate": 2.4672602739726026e-05, |
|
"loss": 1.0892, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.541095890410959, |
|
"grad_norm": 28.48002052307129, |
|
"learning_rate": 2.4604109589041096e-05, |
|
"loss": 1.144, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 15.827131271362305, |
|
"learning_rate": 2.4535616438356165e-05, |
|
"loss": 1.117, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"eval_exact_match": 78.61873226111636, |
|
"eval_f1": 86.50538268692839, |
|
"eval_runtime": 407.223, |
|
"eval_samples_per_second": 25.956, |
|
"eval_steps_per_second": 1.623, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5547945205479452, |
|
"grad_norm": 17.803165435791016, |
|
"learning_rate": 2.446712328767123e-05, |
|
"loss": 1.1428, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.5616438356164384, |
|
"grad_norm": 15.47031307220459, |
|
"learning_rate": 2.43986301369863e-05, |
|
"loss": 1.1178, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.5684931506849316, |
|
"grad_norm": 20.780733108520508, |
|
"learning_rate": 2.433013698630137e-05, |
|
"loss": 1.0105, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.5753424657534246, |
|
"grad_norm": 33.10541915893555, |
|
"learning_rate": 2.426164383561644e-05, |
|
"loss": 1.0502, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5821917808219178, |
|
"grad_norm": 19.134233474731445, |
|
"learning_rate": 2.4193150684931506e-05, |
|
"loss": 1.1227, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.589041095890411, |
|
"grad_norm": 10.8368558883667, |
|
"learning_rate": 2.4124657534246575e-05, |
|
"loss": 1.0697, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5958904109589042, |
|
"grad_norm": 18.496990203857422, |
|
"learning_rate": 2.4057534246575344e-05, |
|
"loss": 1.0705, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.6027397260273972, |
|
"grad_norm": 16.98480987548828, |
|
"learning_rate": 2.398904109589041e-05, |
|
"loss": 1.135, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6095890410958904, |
|
"grad_norm": 35.130393981933594, |
|
"learning_rate": 2.392054794520548e-05, |
|
"loss": 1.1933, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 71.43773651123047, |
|
"learning_rate": 2.385205479452055e-05, |
|
"loss": 1.0783, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6232876712328768, |
|
"grad_norm": 21.982946395874023, |
|
"learning_rate": 2.378356164383562e-05, |
|
"loss": 1.0517, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.6301369863013698, |
|
"grad_norm": 21.33681297302246, |
|
"learning_rate": 2.3715068493150685e-05, |
|
"loss": 1.0829, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.636986301369863, |
|
"grad_norm": 14.706864356994629, |
|
"learning_rate": 2.3646575342465754e-05, |
|
"loss": 1.0452, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.6438356164383562, |
|
"grad_norm": 19.40340805053711, |
|
"learning_rate": 2.3578082191780824e-05, |
|
"loss": 1.0843, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.6506849315068494, |
|
"grad_norm": 66.0240478515625, |
|
"learning_rate": 2.3509589041095893e-05, |
|
"loss": 0.9722, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 30.19057273864746, |
|
"learning_rate": 2.344109589041096e-05, |
|
"loss": 0.8738, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.6643835616438356, |
|
"grad_norm": 24.212846755981445, |
|
"learning_rate": 2.337260273972603e-05, |
|
"loss": 0.9865, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.6712328767123288, |
|
"grad_norm": 24.953773498535156, |
|
"learning_rate": 2.3304109589041098e-05, |
|
"loss": 0.9409, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.678082191780822, |
|
"grad_norm": 32.05400466918945, |
|
"learning_rate": 2.3235616438356168e-05, |
|
"loss": 0.9674, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 20.69804573059082, |
|
"learning_rate": 2.3167123287671234e-05, |
|
"loss": 0.9547, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6917808219178082, |
|
"grad_norm": 9.982364654541016, |
|
"learning_rate": 2.3098630136986303e-05, |
|
"loss": 0.9724, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.6986301369863014, |
|
"grad_norm": 24.489803314208984, |
|
"learning_rate": 2.3030136986301373e-05, |
|
"loss": 1.0141, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.7054794520547946, |
|
"grad_norm": 22.05426597595215, |
|
"learning_rate": 2.296164383561644e-05, |
|
"loss": 1.0011, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.7123287671232876, |
|
"grad_norm": 11.475118637084961, |
|
"learning_rate": 2.2893150684931505e-05, |
|
"loss": 1.0293, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.7191780821917808, |
|
"grad_norm": 24.616731643676758, |
|
"learning_rate": 2.2824657534246574e-05, |
|
"loss": 1.0347, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.726027397260274, |
|
"grad_norm": 13.638339042663574, |
|
"learning_rate": 2.2756164383561644e-05, |
|
"loss": 0.9843, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.7328767123287672, |
|
"grad_norm": 26.214601516723633, |
|
"learning_rate": 2.2687671232876713e-05, |
|
"loss": 0.9909, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.7397260273972602, |
|
"grad_norm": 18.321882247924805, |
|
"learning_rate": 2.261917808219178e-05, |
|
"loss": 0.9893, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7465753424657534, |
|
"grad_norm": 14.493846893310547, |
|
"learning_rate": 2.255068493150685e-05, |
|
"loss": 1.0089, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 29.463472366333008, |
|
"learning_rate": 2.248219178082192e-05, |
|
"loss": 1.052, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7602739726027398, |
|
"grad_norm": 16.674697875976562, |
|
"learning_rate": 2.2413698630136988e-05, |
|
"loss": 0.977, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.7671232876712328, |
|
"grad_norm": 20.96110725402832, |
|
"learning_rate": 2.2345205479452054e-05, |
|
"loss": 0.9393, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.773972602739726, |
|
"grad_norm": 10.15765380859375, |
|
"learning_rate": 2.2276712328767123e-05, |
|
"loss": 0.9647, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.7808219178082192, |
|
"grad_norm": 18.204395294189453, |
|
"learning_rate": 2.2208219178082193e-05, |
|
"loss": 1.0376, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.7876712328767124, |
|
"grad_norm": 12.090936660766602, |
|
"learning_rate": 2.2139726027397262e-05, |
|
"loss": 1.0562, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.7945205479452054, |
|
"grad_norm": 23.062904357910156, |
|
"learning_rate": 2.207123287671233e-05, |
|
"loss": 1.0397, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.8013698630136986, |
|
"grad_norm": 13.404748916625977, |
|
"learning_rate": 2.2002739726027398e-05, |
|
"loss": 1.0177, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.8082191780821918, |
|
"grad_norm": 16.615171432495117, |
|
"learning_rate": 2.1934246575342467e-05, |
|
"loss": 0.9627, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.815068493150685, |
|
"grad_norm": 24.662452697753906, |
|
"learning_rate": 2.1865753424657537e-05, |
|
"loss": 0.9447, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"grad_norm": 9.534379959106445, |
|
"learning_rate": 2.1797260273972603e-05, |
|
"loss": 1.0024, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.821917808219178, |
|
"eval_exact_match": 80.7379375591296, |
|
"eval_f1": 88.15133262306199, |
|
"eval_runtime": 405.8112, |
|
"eval_samples_per_second": 26.047, |
|
"eval_steps_per_second": 1.629, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8287671232876712, |
|
"grad_norm": 31.16766929626465, |
|
"learning_rate": 2.1728767123287672e-05, |
|
"loss": 0.9153, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.8356164383561644, |
|
"grad_norm": 21.178213119506836, |
|
"learning_rate": 2.1660273972602742e-05, |
|
"loss": 0.919, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.8424657534246576, |
|
"grad_norm": 16.18219566345215, |
|
"learning_rate": 2.159178082191781e-05, |
|
"loss": 1.0232, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.8493150684931506, |
|
"grad_norm": 10.178057670593262, |
|
"learning_rate": 2.1523287671232877e-05, |
|
"loss": 1.0113, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.8561643835616438, |
|
"grad_norm": 32.234249114990234, |
|
"learning_rate": 2.1454794520547947e-05, |
|
"loss": 0.9115, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.863013698630137, |
|
"grad_norm": 10.931483268737793, |
|
"learning_rate": 2.1386301369863016e-05, |
|
"loss": 0.9493, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.8698630136986302, |
|
"grad_norm": 18.961946487426758, |
|
"learning_rate": 2.1317808219178086e-05, |
|
"loss": 0.9662, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 32.1358642578125, |
|
"learning_rate": 2.125068493150685e-05, |
|
"loss": 0.9122, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.8835616438356164, |
|
"grad_norm": 27.434032440185547, |
|
"learning_rate": 2.118219178082192e-05, |
|
"loss": 0.9665, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.8904109589041096, |
|
"grad_norm": 13.604903221130371, |
|
"learning_rate": 2.1113698630136987e-05, |
|
"loss": 0.9752, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8972602739726028, |
|
"grad_norm": 13.965779304504395, |
|
"learning_rate": 2.1045205479452053e-05, |
|
"loss": 0.8704, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.9041095890410958, |
|
"grad_norm": 18.716514587402344, |
|
"learning_rate": 2.0976712328767122e-05, |
|
"loss": 0.9159, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.910958904109589, |
|
"grad_norm": 19.964460372924805, |
|
"learning_rate": 2.0908219178082192e-05, |
|
"loss": 0.9359, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.9178082191780822, |
|
"grad_norm": 13.847062110900879, |
|
"learning_rate": 2.083972602739726e-05, |
|
"loss": 0.8623, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.9246575342465754, |
|
"grad_norm": 25.339975357055664, |
|
"learning_rate": 2.0771232876712327e-05, |
|
"loss": 0.9804, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.9315068493150684, |
|
"grad_norm": 16.102794647216797, |
|
"learning_rate": 2.0702739726027397e-05, |
|
"loss": 0.8721, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.9383561643835616, |
|
"grad_norm": 15.555779457092285, |
|
"learning_rate": 2.0634246575342466e-05, |
|
"loss": 0.9942, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.9452054794520548, |
|
"grad_norm": 14.534811019897461, |
|
"learning_rate": 2.0565753424657536e-05, |
|
"loss": 0.9333, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.952054794520548, |
|
"grad_norm": 24.963340759277344, |
|
"learning_rate": 2.0497260273972602e-05, |
|
"loss": 0.966, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.958904109589041, |
|
"grad_norm": 42.927734375, |
|
"learning_rate": 2.042876712328767e-05, |
|
"loss": 0.913, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9657534246575342, |
|
"grad_norm": 14.816558837890625, |
|
"learning_rate": 2.036027397260274e-05, |
|
"loss": 0.9717, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.9726027397260274, |
|
"grad_norm": 27.83946418762207, |
|
"learning_rate": 2.029178082191781e-05, |
|
"loss": 0.9855, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.9794520547945206, |
|
"grad_norm": 23.87200164794922, |
|
"learning_rate": 2.0223287671232876e-05, |
|
"loss": 0.9934, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.9863013698630136, |
|
"grad_norm": 30.313343048095703, |
|
"learning_rate": 2.0154794520547946e-05, |
|
"loss": 1.0148, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.9931506849315068, |
|
"grad_norm": 23.50067138671875, |
|
"learning_rate": 2.0086301369863015e-05, |
|
"loss": 0.9578, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 17.968570709228516, |
|
"learning_rate": 2.0017808219178085e-05, |
|
"loss": 0.821, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.0068493150684932, |
|
"grad_norm": 44.593414306640625, |
|
"learning_rate": 1.994931506849315e-05, |
|
"loss": 0.6823, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.0136986301369864, |
|
"grad_norm": 12.959754943847656, |
|
"learning_rate": 1.988082191780822e-05, |
|
"loss": 0.6937, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.0205479452054795, |
|
"grad_norm": 20.291378021240234, |
|
"learning_rate": 1.981232876712329e-05, |
|
"loss": 0.6457, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.0273972602739727, |
|
"grad_norm": 5.460036277770996, |
|
"learning_rate": 1.974383561643836e-05, |
|
"loss": 0.729, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.0342465753424657, |
|
"grad_norm": 17.41657257080078, |
|
"learning_rate": 1.9675342465753425e-05, |
|
"loss": 0.7032, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.0410958904109588, |
|
"grad_norm": 10.595620155334473, |
|
"learning_rate": 1.9606849315068495e-05, |
|
"loss": 0.6788, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.047945205479452, |
|
"grad_norm": 32.301509857177734, |
|
"learning_rate": 1.9538356164383564e-05, |
|
"loss": 0.669, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.0547945205479452, |
|
"grad_norm": 5.369728088378906, |
|
"learning_rate": 1.9469863013698634e-05, |
|
"loss": 0.6169, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.0616438356164384, |
|
"grad_norm": 38.604183197021484, |
|
"learning_rate": 1.94013698630137e-05, |
|
"loss": 0.6585, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.0684931506849316, |
|
"grad_norm": 12.535406112670898, |
|
"learning_rate": 1.9332876712328766e-05, |
|
"loss": 0.6448, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.0753424657534247, |
|
"grad_norm": 11.292516708374023, |
|
"learning_rate": 1.9264383561643835e-05, |
|
"loss": 0.6429, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.0821917808219177, |
|
"grad_norm": 15.400158882141113, |
|
"learning_rate": 1.9195890410958905e-05, |
|
"loss": 0.6553, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.0890410958904109, |
|
"grad_norm": 28.782424926757812, |
|
"learning_rate": 1.912739726027397e-05, |
|
"loss": 0.7737, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"grad_norm": 18.407026290893555, |
|
"learning_rate": 1.905890410958904e-05, |
|
"loss": 0.5717, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"eval_exact_match": 81.46641438032167, |
|
"eval_f1": 88.73804254841154, |
|
"eval_runtime": 407.6234, |
|
"eval_samples_per_second": 25.931, |
|
"eval_steps_per_second": 1.622, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1027397260273972, |
|
"grad_norm": 54.460880279541016, |
|
"learning_rate": 1.899041095890411e-05, |
|
"loss": 0.6957, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.1095890410958904, |
|
"grad_norm": 10.459880828857422, |
|
"learning_rate": 1.892191780821918e-05, |
|
"loss": 0.6871, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.1164383561643836, |
|
"grad_norm": 21.884292602539062, |
|
"learning_rate": 1.8853424657534245e-05, |
|
"loss": 0.7586, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.1232876712328768, |
|
"grad_norm": 18.18182373046875, |
|
"learning_rate": 1.8784931506849315e-05, |
|
"loss": 0.7294, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.13013698630137, |
|
"grad_norm": 14.117548942565918, |
|
"learning_rate": 1.8716438356164384e-05, |
|
"loss": 0.6837, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.1369863013698631, |
|
"grad_norm": 20.044261932373047, |
|
"learning_rate": 1.8647945205479454e-05, |
|
"loss": 0.7353, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.143835616438356, |
|
"grad_norm": 16.077611923217773, |
|
"learning_rate": 1.857945205479452e-05, |
|
"loss": 0.6038, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.1506849315068493, |
|
"grad_norm": 28.85369300842285, |
|
"learning_rate": 1.851095890410959e-05, |
|
"loss": 0.6414, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.1575342465753424, |
|
"grad_norm": 14.509927749633789, |
|
"learning_rate": 1.844246575342466e-05, |
|
"loss": 0.7068, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.1643835616438356, |
|
"grad_norm": 14.299630165100098, |
|
"learning_rate": 1.837397260273973e-05, |
|
"loss": 0.7094, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1712328767123288, |
|
"grad_norm": 11.768505096435547, |
|
"learning_rate": 1.8305479452054794e-05, |
|
"loss": 0.6914, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.178082191780822, |
|
"grad_norm": 42.37126922607422, |
|
"learning_rate": 1.8236986301369864e-05, |
|
"loss": 0.6539, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.1849315068493151, |
|
"grad_norm": 14.21442985534668, |
|
"learning_rate": 1.8168493150684933e-05, |
|
"loss": 0.7302, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.191780821917808, |
|
"grad_norm": 19.04937171936035, |
|
"learning_rate": 1.8100000000000003e-05, |
|
"loss": 0.7685, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.1986301369863013, |
|
"grad_norm": 17.215967178344727, |
|
"learning_rate": 1.803150684931507e-05, |
|
"loss": 0.6888, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.2054794520547945, |
|
"grad_norm": 16.23516082763672, |
|
"learning_rate": 1.796301369863014e-05, |
|
"loss": 0.6825, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.2123287671232876, |
|
"grad_norm": 39.78145217895508, |
|
"learning_rate": 1.7894520547945208e-05, |
|
"loss": 0.7799, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.2191780821917808, |
|
"grad_norm": 26.684986114501953, |
|
"learning_rate": 1.7826027397260277e-05, |
|
"loss": 0.7425, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.226027397260274, |
|
"grad_norm": 35.67079544067383, |
|
"learning_rate": 1.7757534246575343e-05, |
|
"loss": 0.7405, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.2328767123287672, |
|
"grad_norm": 18.228994369506836, |
|
"learning_rate": 1.7689041095890413e-05, |
|
"loss": 0.5968, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.2397260273972603, |
|
"grad_norm": 15.768519401550293, |
|
"learning_rate": 1.7620547945205482e-05, |
|
"loss": 0.6791, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.2465753424657535, |
|
"grad_norm": 26.350936889648438, |
|
"learning_rate": 1.7552054794520545e-05, |
|
"loss": 0.7312, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.2534246575342465, |
|
"grad_norm": 22.138206481933594, |
|
"learning_rate": 1.7483561643835615e-05, |
|
"loss": 0.6496, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.2602739726027397, |
|
"grad_norm": 20.530515670776367, |
|
"learning_rate": 1.7415068493150684e-05, |
|
"loss": 0.6592, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.2671232876712328, |
|
"grad_norm": 18.60872459411621, |
|
"learning_rate": 1.7346575342465754e-05, |
|
"loss": 0.7109, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.273972602739726, |
|
"grad_norm": 10.934627532958984, |
|
"learning_rate": 1.727808219178082e-05, |
|
"loss": 0.6364, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.2808219178082192, |
|
"grad_norm": 24.400938034057617, |
|
"learning_rate": 1.720958904109589e-05, |
|
"loss": 0.6624, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.2876712328767124, |
|
"grad_norm": 33.15473556518555, |
|
"learning_rate": 1.714109589041096e-05, |
|
"loss": 0.7526, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.2945205479452055, |
|
"grad_norm": 4.9253339767456055, |
|
"learning_rate": 1.7072602739726028e-05, |
|
"loss": 0.6094, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.3013698630136985, |
|
"grad_norm": 26.45025634765625, |
|
"learning_rate": 1.7004109589041094e-05, |
|
"loss": 0.6779, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.308219178082192, |
|
"grad_norm": 12.181562423706055, |
|
"learning_rate": 1.6935616438356164e-05, |
|
"loss": 0.7728, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.3150684931506849, |
|
"grad_norm": 6.9998040199279785, |
|
"learning_rate": 1.6867123287671233e-05, |
|
"loss": 0.6221, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.321917808219178, |
|
"grad_norm": 40.702369689941406, |
|
"learning_rate": 1.6798630136986303e-05, |
|
"loss": 0.7124, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.3287671232876712, |
|
"grad_norm": 18.84299659729004, |
|
"learning_rate": 1.673013698630137e-05, |
|
"loss": 0.6579, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.3356164383561644, |
|
"grad_norm": 24.911535263061523, |
|
"learning_rate": 1.6661643835616438e-05, |
|
"loss": 0.7197, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.3424657534246576, |
|
"grad_norm": 24.64927101135254, |
|
"learning_rate": 1.6593150684931508e-05, |
|
"loss": 0.6767, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.3493150684931507, |
|
"grad_norm": 11.854528427124023, |
|
"learning_rate": 1.6524657534246577e-05, |
|
"loss": 0.6726, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.356164383561644, |
|
"grad_norm": 38.699310302734375, |
|
"learning_rate": 1.6456164383561643e-05, |
|
"loss": 0.6484, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.3630136986301369, |
|
"grad_norm": 21.76763916015625, |
|
"learning_rate": 1.6387671232876713e-05, |
|
"loss": 0.6959, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 15.432331085205078, |
|
"learning_rate": 1.6319178082191782e-05, |
|
"loss": 0.6585, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"eval_exact_match": 80.78524124881741, |
|
"eval_f1": 88.39324523394289, |
|
"eval_runtime": 407.758, |
|
"eval_samples_per_second": 25.922, |
|
"eval_steps_per_second": 1.621, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.3767123287671232, |
|
"grad_norm": 21.714828491210938, |
|
"learning_rate": 1.625068493150685e-05, |
|
"loss": 0.6976, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.3835616438356164, |
|
"grad_norm": 6.602792739868164, |
|
"learning_rate": 1.6182191780821918e-05, |
|
"loss": 0.6094, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.3904109589041096, |
|
"grad_norm": 42.471412658691406, |
|
"learning_rate": 1.6113698630136987e-05, |
|
"loss": 0.7151, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.3972602739726028, |
|
"grad_norm": 11.658584594726562, |
|
"learning_rate": 1.6045205479452057e-05, |
|
"loss": 0.639, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.404109589041096, |
|
"grad_norm": 37.821659088134766, |
|
"learning_rate": 1.5976712328767126e-05, |
|
"loss": 0.6079, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.410958904109589, |
|
"grad_norm": 5.837065696716309, |
|
"learning_rate": 1.5908219178082192e-05, |
|
"loss": 0.5878, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.4178082191780823, |
|
"grad_norm": 67.67562866210938, |
|
"learning_rate": 1.5839726027397258e-05, |
|
"loss": 0.7724, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.4246575342465753, |
|
"grad_norm": 2.624040365219116, |
|
"learning_rate": 1.5771232876712328e-05, |
|
"loss": 0.6753, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.4315068493150684, |
|
"grad_norm": 32.9188232421875, |
|
"learning_rate": 1.5702739726027397e-05, |
|
"loss": 0.6534, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 1.4383561643835616, |
|
"grad_norm": 32.16576385498047, |
|
"learning_rate": 1.5634246575342463e-05, |
|
"loss": 0.716, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.4452054794520548, |
|
"grad_norm": 19.11595344543457, |
|
"learning_rate": 1.5565753424657533e-05, |
|
"loss": 0.6081, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 1.452054794520548, |
|
"grad_norm": 13.033523559570312, |
|
"learning_rate": 1.5497260273972602e-05, |
|
"loss": 0.7286, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.4589041095890412, |
|
"grad_norm": 22.972614288330078, |
|
"learning_rate": 1.5428767123287672e-05, |
|
"loss": 0.6916, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 1.4657534246575343, |
|
"grad_norm": 14.032968521118164, |
|
"learning_rate": 1.5360273972602738e-05, |
|
"loss": 0.6643, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.4726027397260273, |
|
"grad_norm": 60.66258239746094, |
|
"learning_rate": 1.5291780821917807e-05, |
|
"loss": 0.5659, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 1.4794520547945205, |
|
"grad_norm": 18.83857536315918, |
|
"learning_rate": 1.5223287671232877e-05, |
|
"loss": 0.6599, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.4863013698630136, |
|
"grad_norm": 16.09821128845215, |
|
"learning_rate": 1.5154794520547946e-05, |
|
"loss": 0.6523, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 1.4931506849315068, |
|
"grad_norm": 8.068375587463379, |
|
"learning_rate": 1.5086301369863012e-05, |
|
"loss": 0.6724, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 16.360702514648438, |
|
"learning_rate": 1.5017808219178082e-05, |
|
"loss": 0.6371, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 1.5068493150684932, |
|
"grad_norm": 17.347824096679688, |
|
"learning_rate": 1.4949315068493151e-05, |
|
"loss": 0.6732, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.5136986301369864, |
|
"grad_norm": 45.4647216796875, |
|
"learning_rate": 1.4880821917808219e-05, |
|
"loss": 0.721, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 1.5205479452054793, |
|
"grad_norm": 53.550174713134766, |
|
"learning_rate": 1.4812328767123289e-05, |
|
"loss": 0.6261, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.5273972602739727, |
|
"grad_norm": 27.888072967529297, |
|
"learning_rate": 1.4743835616438356e-05, |
|
"loss": 0.7, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 1.5342465753424657, |
|
"grad_norm": 23.892024993896484, |
|
"learning_rate": 1.4675342465753426e-05, |
|
"loss": 0.6329, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.541095890410959, |
|
"grad_norm": 12.964653968811035, |
|
"learning_rate": 1.4606849315068494e-05, |
|
"loss": 0.6893, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 1.547945205479452, |
|
"grad_norm": 16.30516242980957, |
|
"learning_rate": 1.4538356164383563e-05, |
|
"loss": 0.6674, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.5547945205479452, |
|
"grad_norm": 20.829771041870117, |
|
"learning_rate": 1.4469863013698629e-05, |
|
"loss": 0.7394, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 1.5616438356164384, |
|
"grad_norm": 21.59797477722168, |
|
"learning_rate": 1.4401369863013699e-05, |
|
"loss": 0.7592, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.5684931506849316, |
|
"grad_norm": 14.089178085327148, |
|
"learning_rate": 1.4332876712328766e-05, |
|
"loss": 0.6692, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 1.5753424657534247, |
|
"grad_norm": 9.009936332702637, |
|
"learning_rate": 1.4264383561643836e-05, |
|
"loss": 0.5945, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.5821917808219177, |
|
"grad_norm": 14.968718528747559, |
|
"learning_rate": 1.4195890410958904e-05, |
|
"loss": 0.7055, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 1.589041095890411, |
|
"grad_norm": 8.840102195739746, |
|
"learning_rate": 1.4127397260273973e-05, |
|
"loss": 0.7345, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.595890410958904, |
|
"grad_norm": 17.11764907836914, |
|
"learning_rate": 1.4058904109589041e-05, |
|
"loss": 0.7073, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 1.6027397260273972, |
|
"grad_norm": 11.691437721252441, |
|
"learning_rate": 1.399041095890411e-05, |
|
"loss": 0.6319, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.6095890410958904, |
|
"grad_norm": 12.635778427124023, |
|
"learning_rate": 1.3921917808219178e-05, |
|
"loss": 0.6966, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 1.6164383561643836, |
|
"grad_norm": 8.899778366088867, |
|
"learning_rate": 1.3853424657534248e-05, |
|
"loss": 0.7087, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.6232876712328768, |
|
"grad_norm": 20.235586166381836, |
|
"learning_rate": 1.3784931506849315e-05, |
|
"loss": 0.6193, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 1.6301369863013697, |
|
"grad_norm": 36.906707763671875, |
|
"learning_rate": 1.3716438356164385e-05, |
|
"loss": 0.6291, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.6369863013698631, |
|
"grad_norm": 11.22154712677002, |
|
"learning_rate": 1.3647945205479453e-05, |
|
"loss": 0.7093, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"grad_norm": 27.797801971435547, |
|
"learning_rate": 1.357945205479452e-05, |
|
"loss": 0.6589, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"eval_exact_match": 82.42194891201514, |
|
"eval_f1": 89.45487444483695, |
|
"eval_runtime": 406.0814, |
|
"eval_samples_per_second": 26.029, |
|
"eval_steps_per_second": 1.628, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.6506849315068495, |
|
"grad_norm": 39.78184509277344, |
|
"learning_rate": 1.3510958904109588e-05, |
|
"loss": 0.6151, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 1.6575342465753424, |
|
"grad_norm": 8.190918922424316, |
|
"learning_rate": 1.3442465753424658e-05, |
|
"loss": 0.6776, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.6643835616438356, |
|
"grad_norm": 33.2342529296875, |
|
"learning_rate": 1.3373972602739725e-05, |
|
"loss": 0.6841, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 1.6712328767123288, |
|
"grad_norm": 20.676816940307617, |
|
"learning_rate": 1.3305479452054795e-05, |
|
"loss": 0.6696, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.678082191780822, |
|
"grad_norm": 28.78380584716797, |
|
"learning_rate": 1.3236986301369863e-05, |
|
"loss": 0.6833, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 1.6849315068493151, |
|
"grad_norm": 7.916901588439941, |
|
"learning_rate": 1.3168493150684932e-05, |
|
"loss": 0.6699, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.691780821917808, |
|
"grad_norm": 19.101404190063477, |
|
"learning_rate": 1.31e-05, |
|
"loss": 0.698, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 1.6986301369863015, |
|
"grad_norm": 7.5608978271484375, |
|
"learning_rate": 1.303150684931507e-05, |
|
"loss": 0.6808, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.7054794520547945, |
|
"grad_norm": 24.476348876953125, |
|
"learning_rate": 1.2963013698630137e-05, |
|
"loss": 0.7041, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 1.7123287671232876, |
|
"grad_norm": 22.60247039794922, |
|
"learning_rate": 1.2894520547945207e-05, |
|
"loss": 0.6406, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.7191780821917808, |
|
"grad_norm": 18.08481216430664, |
|
"learning_rate": 1.2826027397260274e-05, |
|
"loss": 0.6683, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 1.726027397260274, |
|
"grad_norm": 254.4781951904297, |
|
"learning_rate": 1.2757534246575342e-05, |
|
"loss": 0.5617, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.7328767123287672, |
|
"grad_norm": 24.120647430419922, |
|
"learning_rate": 1.268904109589041e-05, |
|
"loss": 0.6121, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 1.7397260273972601, |
|
"grad_norm": 25.768285751342773, |
|
"learning_rate": 1.262054794520548e-05, |
|
"loss": 0.5745, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.7465753424657535, |
|
"grad_norm": 13.516427993774414, |
|
"learning_rate": 1.2552054794520547e-05, |
|
"loss": 0.6538, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 1.7534246575342465, |
|
"grad_norm": 26.595272064208984, |
|
"learning_rate": 1.2483561643835617e-05, |
|
"loss": 0.7562, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.7602739726027399, |
|
"grad_norm": 9.390408515930176, |
|
"learning_rate": 1.2415068493150685e-05, |
|
"loss": 0.6179, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 1.7671232876712328, |
|
"grad_norm": 25.727460861206055, |
|
"learning_rate": 1.2346575342465754e-05, |
|
"loss": 0.6364, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.773972602739726, |
|
"grad_norm": 17.040943145751953, |
|
"learning_rate": 1.2278082191780822e-05, |
|
"loss": 0.6312, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 1.7808219178082192, |
|
"grad_norm": 47.64375305175781, |
|
"learning_rate": 1.2209589041095891e-05, |
|
"loss": 0.6916, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.7876712328767124, |
|
"grad_norm": 7.669281005859375, |
|
"learning_rate": 1.2141095890410959e-05, |
|
"loss": 0.6765, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 1.7945205479452055, |
|
"grad_norm": 27.307491302490234, |
|
"learning_rate": 1.2072602739726028e-05, |
|
"loss": 0.6479, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.8013698630136985, |
|
"grad_norm": 34.39345932006836, |
|
"learning_rate": 1.2004109589041096e-05, |
|
"loss": 0.6739, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 1.808219178082192, |
|
"grad_norm": 3.3462016582489014, |
|
"learning_rate": 1.1935616438356166e-05, |
|
"loss": 0.6523, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.8150684931506849, |
|
"grad_norm": 6.420201301574707, |
|
"learning_rate": 1.1867123287671232e-05, |
|
"loss": 0.6458, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 1.821917808219178, |
|
"grad_norm": 60.785194396972656, |
|
"learning_rate": 1.1798630136986301e-05, |
|
"loss": 0.621, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.8287671232876712, |
|
"grad_norm": 25.992483139038086, |
|
"learning_rate": 1.1730136986301369e-05, |
|
"loss": 0.6654, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 1.8356164383561644, |
|
"grad_norm": 26.358213424682617, |
|
"learning_rate": 1.1661643835616439e-05, |
|
"loss": 0.6936, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.8424657534246576, |
|
"grad_norm": 19.42777442932129, |
|
"learning_rate": 1.1593150684931506e-05, |
|
"loss": 0.6549, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.8493150684931505, |
|
"grad_norm": 35.19224548339844, |
|
"learning_rate": 1.1524657534246576e-05, |
|
"loss": 0.6872, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.856164383561644, |
|
"grad_norm": 24.116058349609375, |
|
"learning_rate": 1.1456164383561644e-05, |
|
"loss": 0.6744, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.8630136986301369, |
|
"grad_norm": 29.181964874267578, |
|
"learning_rate": 1.1387671232876713e-05, |
|
"loss": 0.7206, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.8698630136986303, |
|
"grad_norm": 33.76987838745117, |
|
"learning_rate": 1.131917808219178e-05, |
|
"loss": 0.6819, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.8767123287671232, |
|
"grad_norm": 17.98587417602539, |
|
"learning_rate": 1.125068493150685e-05, |
|
"loss": 0.6991, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.8835616438356164, |
|
"grad_norm": 30.777263641357422, |
|
"learning_rate": 1.1182191780821918e-05, |
|
"loss": 0.6692, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.8904109589041096, |
|
"grad_norm": 9.233376502990723, |
|
"learning_rate": 1.1113698630136988e-05, |
|
"loss": 0.6068, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.8972602739726028, |
|
"grad_norm": 6.541473388671875, |
|
"learning_rate": 1.1045205479452055e-05, |
|
"loss": 0.6263, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.904109589041096, |
|
"grad_norm": 10.6819486618042, |
|
"learning_rate": 1.0976712328767123e-05, |
|
"loss": 0.5947, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.910958904109589, |
|
"grad_norm": 11.901646614074707, |
|
"learning_rate": 1.0908219178082191e-05, |
|
"loss": 0.722, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"grad_norm": 11.687748908996582, |
|
"learning_rate": 1.083972602739726e-05, |
|
"loss": 0.6237, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.9178082191780823, |
|
"eval_exact_match": 82.82876064333018, |
|
"eval_f1": 89.81835180261338, |
|
"eval_runtime": 406.8019, |
|
"eval_samples_per_second": 25.983, |
|
"eval_steps_per_second": 1.625, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.9246575342465753, |
|
"grad_norm": 22.835041046142578, |
|
"learning_rate": 1.0771232876712328e-05, |
|
"loss": 0.657, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.9315068493150684, |
|
"grad_norm": 15.053028106689453, |
|
"learning_rate": 1.0702739726027398e-05, |
|
"loss": 0.7027, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.9383561643835616, |
|
"grad_norm": 18.73754119873047, |
|
"learning_rate": 1.0634246575342465e-05, |
|
"loss": 0.6092, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.9452054794520548, |
|
"grad_norm": 22.99529266357422, |
|
"learning_rate": 1.0565753424657535e-05, |
|
"loss": 0.6337, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.952054794520548, |
|
"grad_norm": 11.403436660766602, |
|
"learning_rate": 1.0497260273972603e-05, |
|
"loss": 0.7014, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.958904109589041, |
|
"grad_norm": 10.497685432434082, |
|
"learning_rate": 1.0428767123287672e-05, |
|
"loss": 0.624, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.9657534246575343, |
|
"grad_norm": 39.24784851074219, |
|
"learning_rate": 1.036027397260274e-05, |
|
"loss": 0.7035, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.9726027397260273, |
|
"grad_norm": 10.58161449432373, |
|
"learning_rate": 1.029178082191781e-05, |
|
"loss": 0.6705, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.9794520547945207, |
|
"grad_norm": 30.54125213623047, |
|
"learning_rate": 1.0223287671232877e-05, |
|
"loss": 0.6178, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.9863013698630136, |
|
"grad_norm": 11.545398712158203, |
|
"learning_rate": 1.0154794520547947e-05, |
|
"loss": 0.6929, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.9931506849315068, |
|
"grad_norm": 12.356890678405762, |
|
"learning_rate": 1.0086301369863013e-05, |
|
"loss": 0.6686, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 12.808874130249023, |
|
"learning_rate": 1.0017808219178082e-05, |
|
"loss": 0.709, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 2.006849315068493, |
|
"grad_norm": 20.87654685974121, |
|
"learning_rate": 9.94931506849315e-06, |
|
"loss": 0.3807, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 2.0136986301369864, |
|
"grad_norm": 11.455994606018066, |
|
"learning_rate": 9.88082191780822e-06, |
|
"loss": 0.4346, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.0205479452054793, |
|
"grad_norm": 9.450268745422363, |
|
"learning_rate": 9.812328767123287e-06, |
|
"loss": 0.4046, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 2.0273972602739727, |
|
"grad_norm": 11.238616943359375, |
|
"learning_rate": 9.743835616438357e-06, |
|
"loss": 0.4077, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 2.0342465753424657, |
|
"grad_norm": 7.271957874298096, |
|
"learning_rate": 9.675342465753424e-06, |
|
"loss": 0.4039, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 2.041095890410959, |
|
"grad_norm": 13.240756034851074, |
|
"learning_rate": 9.606849315068494e-06, |
|
"loss": 0.4307, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 2.047945205479452, |
|
"grad_norm": 17.84387969970703, |
|
"learning_rate": 9.538356164383562e-06, |
|
"loss": 0.4196, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 2.0547945205479454, |
|
"grad_norm": 15.702322959899902, |
|
"learning_rate": 9.469863013698631e-06, |
|
"loss": 0.39, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.0616438356164384, |
|
"grad_norm": 17.96023178100586, |
|
"learning_rate": 9.401369863013699e-06, |
|
"loss": 0.3965, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 2.0684931506849313, |
|
"grad_norm": 29.622323989868164, |
|
"learning_rate": 9.332876712328768e-06, |
|
"loss": 0.4599, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 2.0753424657534247, |
|
"grad_norm": 10.847167015075684, |
|
"learning_rate": 9.264383561643836e-06, |
|
"loss": 0.3994, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 2.0821917808219177, |
|
"grad_norm": 9.122156143188477, |
|
"learning_rate": 9.195890410958904e-06, |
|
"loss": 0.3829, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 2.089041095890411, |
|
"grad_norm": 61.343101501464844, |
|
"learning_rate": 9.127397260273972e-06, |
|
"loss": 0.4434, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 2.095890410958904, |
|
"grad_norm": 14.082651138305664, |
|
"learning_rate": 9.058904109589041e-06, |
|
"loss": 0.377, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.1027397260273974, |
|
"grad_norm": 10.202653884887695, |
|
"learning_rate": 8.990410958904109e-06, |
|
"loss": 0.3685, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 2.1095890410958904, |
|
"grad_norm": 18.169658660888672, |
|
"learning_rate": 8.921917808219179e-06, |
|
"loss": 0.4122, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.1164383561643834, |
|
"grad_norm": 54.33354568481445, |
|
"learning_rate": 8.853424657534246e-06, |
|
"loss": 0.3906, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 2.1232876712328768, |
|
"grad_norm": 6.232911109924316, |
|
"learning_rate": 8.784931506849316e-06, |
|
"loss": 0.3766, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.1301369863013697, |
|
"grad_norm": 40.90781784057617, |
|
"learning_rate": 8.716438356164384e-06, |
|
"loss": 0.4178, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 2.136986301369863, |
|
"grad_norm": 43.94190979003906, |
|
"learning_rate": 8.647945205479453e-06, |
|
"loss": 0.4115, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.143835616438356, |
|
"grad_norm": 14.004490852355957, |
|
"learning_rate": 8.57945205479452e-06, |
|
"loss": 0.4495, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 2.1506849315068495, |
|
"grad_norm": 10.205181121826172, |
|
"learning_rate": 8.51095890410959e-06, |
|
"loss": 0.3702, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 2.1575342465753424, |
|
"grad_norm": 12.333789825439453, |
|
"learning_rate": 8.442465753424658e-06, |
|
"loss": 0.4153, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 2.1643835616438354, |
|
"grad_norm": 10.010555267333984, |
|
"learning_rate": 8.373972602739728e-06, |
|
"loss": 0.3854, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.171232876712329, |
|
"grad_norm": 25.08806037902832, |
|
"learning_rate": 8.305479452054794e-06, |
|
"loss": 0.3935, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 2.1780821917808217, |
|
"grad_norm": 5.474767684936523, |
|
"learning_rate": 8.236986301369863e-06, |
|
"loss": 0.3408, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.184931506849315, |
|
"grad_norm": 19.28006362915039, |
|
"learning_rate": 8.168493150684931e-06, |
|
"loss": 0.3059, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 2.191780821917808, |
|
"grad_norm": 1.0139840841293335, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.4515, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.191780821917808, |
|
"eval_exact_match": 82.21381267738883, |
|
"eval_f1": 89.63473536642901, |
|
"eval_runtime": 406.5079, |
|
"eval_samples_per_second": 26.002, |
|
"eval_steps_per_second": 1.626, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.1986301369863015, |
|
"grad_norm": 26.130165100097656, |
|
"learning_rate": 8.031506849315068e-06, |
|
"loss": 0.3657, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 2.2054794520547945, |
|
"grad_norm": 12.132906913757324, |
|
"learning_rate": 7.963013698630138e-06, |
|
"loss": 0.3715, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 2.212328767123288, |
|
"grad_norm": 19.634357452392578, |
|
"learning_rate": 7.894520547945205e-06, |
|
"loss": 0.3868, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 2.219178082191781, |
|
"grad_norm": 12.86025333404541, |
|
"learning_rate": 7.826027397260275e-06, |
|
"loss": 0.4119, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.2260273972602738, |
|
"grad_norm": 13.171170234680176, |
|
"learning_rate": 7.757534246575343e-06, |
|
"loss": 0.4362, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 2.232876712328767, |
|
"grad_norm": 32.56090545654297, |
|
"learning_rate": 7.689041095890412e-06, |
|
"loss": 0.3995, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 2.23972602739726, |
|
"grad_norm": 31.1318416595459, |
|
"learning_rate": 7.620547945205479e-06, |
|
"loss": 0.4267, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 2.2465753424657535, |
|
"grad_norm": 8.52885913848877, |
|
"learning_rate": 7.5520547945205485e-06, |
|
"loss": 0.4065, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.2534246575342465, |
|
"grad_norm": 5.443692684173584, |
|
"learning_rate": 7.483561643835616e-06, |
|
"loss": 0.4486, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 2.26027397260274, |
|
"grad_norm": 25.305814743041992, |
|
"learning_rate": 7.415068493150685e-06, |
|
"loss": 0.4277, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.267123287671233, |
|
"grad_norm": 32.161224365234375, |
|
"learning_rate": 7.3465753424657536e-06, |
|
"loss": 0.4175, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 2.2739726027397262, |
|
"grad_norm": 12.804214477539062, |
|
"learning_rate": 7.278082191780822e-06, |
|
"loss": 0.3335, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.280821917808219, |
|
"grad_norm": 22.67701530456543, |
|
"learning_rate": 7.20958904109589e-06, |
|
"loss": 0.4323, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 2.287671232876712, |
|
"grad_norm": 31.13144302368164, |
|
"learning_rate": 7.142465753424657e-06, |
|
"loss": 0.4542, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.2945205479452055, |
|
"grad_norm": 20.63867950439453, |
|
"learning_rate": 7.073972602739726e-06, |
|
"loss": 0.4023, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 2.3013698630136985, |
|
"grad_norm": 21.299816131591797, |
|
"learning_rate": 7.005479452054794e-06, |
|
"loss": 0.3852, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.308219178082192, |
|
"grad_norm": 27.046512603759766, |
|
"learning_rate": 6.936986301369863e-06, |
|
"loss": 0.4234, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 2.315068493150685, |
|
"grad_norm": 23.793231964111328, |
|
"learning_rate": 6.8684931506849315e-06, |
|
"loss": 0.4347, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.3219178082191783, |
|
"grad_norm": 19.59113883972168, |
|
"learning_rate": 6.8e-06, |
|
"loss": 0.4505, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 2.328767123287671, |
|
"grad_norm": 14.70582389831543, |
|
"learning_rate": 6.731506849315069e-06, |
|
"loss": 0.36, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.3356164383561646, |
|
"grad_norm": 49.76693344116211, |
|
"learning_rate": 6.6630136986301365e-06, |
|
"loss": 0.3639, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 2.3424657534246576, |
|
"grad_norm": 44.807167053222656, |
|
"learning_rate": 6.594520547945205e-06, |
|
"loss": 0.3981, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.3493150684931505, |
|
"grad_norm": 9.841875076293945, |
|
"learning_rate": 6.526027397260274e-06, |
|
"loss": 0.3733, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 2.356164383561644, |
|
"grad_norm": 50.942108154296875, |
|
"learning_rate": 6.457534246575342e-06, |
|
"loss": 0.4233, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.363013698630137, |
|
"grad_norm": 9.675677299499512, |
|
"learning_rate": 6.389041095890411e-06, |
|
"loss": 0.4035, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 2.3698630136986303, |
|
"grad_norm": 5.541302680969238, |
|
"learning_rate": 6.32054794520548e-06, |
|
"loss": 0.4161, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.3767123287671232, |
|
"grad_norm": 8.581879615783691, |
|
"learning_rate": 6.2520547945205474e-06, |
|
"loss": 0.3986, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 2.383561643835616, |
|
"grad_norm": 9.531363487243652, |
|
"learning_rate": 6.183561643835616e-06, |
|
"loss": 0.3827, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.3904109589041096, |
|
"grad_norm": 7.351466178894043, |
|
"learning_rate": 6.115068493150685e-06, |
|
"loss": 0.3796, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 2.3972602739726026, |
|
"grad_norm": 8.607413291931152, |
|
"learning_rate": 6.046575342465753e-06, |
|
"loss": 0.3983, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.404109589041096, |
|
"grad_norm": 8.33619499206543, |
|
"learning_rate": 5.978082191780822e-06, |
|
"loss": 0.3695, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 2.410958904109589, |
|
"grad_norm": 6.319853782653809, |
|
"learning_rate": 5.9095890410958906e-06, |
|
"loss": 0.3843, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.4178082191780823, |
|
"grad_norm": 19.26327133178711, |
|
"learning_rate": 5.841095890410958e-06, |
|
"loss": 0.4723, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 2.4246575342465753, |
|
"grad_norm": 16.118913650512695, |
|
"learning_rate": 5.772602739726027e-06, |
|
"loss": 0.4518, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.4315068493150687, |
|
"grad_norm": 7.736336708068848, |
|
"learning_rate": 5.704109589041096e-06, |
|
"loss": 0.3713, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 2.4383561643835616, |
|
"grad_norm": 11.381460189819336, |
|
"learning_rate": 5.635616438356164e-06, |
|
"loss": 0.4285, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.4452054794520546, |
|
"grad_norm": 401.49493408203125, |
|
"learning_rate": 5.567123287671233e-06, |
|
"loss": 0.3637, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 2.452054794520548, |
|
"grad_norm": 20.71985626220703, |
|
"learning_rate": 5.4986301369863015e-06, |
|
"loss": 0.4036, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.458904109589041, |
|
"grad_norm": 14.313848495483398, |
|
"learning_rate": 5.43013698630137e-06, |
|
"loss": 0.4131, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 2.4657534246575343, |
|
"grad_norm": 14.69888687133789, |
|
"learning_rate": 5.361643835616438e-06, |
|
"loss": 0.4162, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.4657534246575343, |
|
"eval_exact_match": 82.2705771050142, |
|
"eval_f1": 89.66159681312358, |
|
"eval_runtime": 407.322, |
|
"eval_samples_per_second": 25.95, |
|
"eval_steps_per_second": 1.623, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.4726027397260273, |
|
"grad_norm": 15.124028205871582, |
|
"learning_rate": 5.2931506849315065e-06, |
|
"loss": 0.4069, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 2.4794520547945207, |
|
"grad_norm": 32.08819580078125, |
|
"learning_rate": 5.224657534246575e-06, |
|
"loss": 0.3963, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.4863013698630136, |
|
"grad_norm": 10.072858810424805, |
|
"learning_rate": 5.156164383561644e-06, |
|
"loss": 0.3669, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 2.493150684931507, |
|
"grad_norm": 7.13778018951416, |
|
"learning_rate": 5.087671232876712e-06, |
|
"loss": 0.3498, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 22.02096176147461, |
|
"learning_rate": 5.019178082191781e-06, |
|
"loss": 0.4829, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 2.506849315068493, |
|
"grad_norm": 11.320915222167969, |
|
"learning_rate": 4.950684931506849e-06, |
|
"loss": 0.3595, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.5136986301369864, |
|
"grad_norm": 12.33502197265625, |
|
"learning_rate": 4.8821917808219174e-06, |
|
"loss": 0.3876, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 2.5205479452054793, |
|
"grad_norm": 13.46191692352295, |
|
"learning_rate": 4.813698630136986e-06, |
|
"loss": 0.454, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.5273972602739727, |
|
"grad_norm": 13.49188232421875, |
|
"learning_rate": 4.746575342465753e-06, |
|
"loss": 0.3855, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 2.5342465753424657, |
|
"grad_norm": 17.15059471130371, |
|
"learning_rate": 4.678082191780822e-06, |
|
"loss": 0.366, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.541095890410959, |
|
"grad_norm": 8.814437866210938, |
|
"learning_rate": 4.60958904109589e-06, |
|
"loss": 0.4034, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 2.547945205479452, |
|
"grad_norm": 22.521684646606445, |
|
"learning_rate": 4.541095890410959e-06, |
|
"loss": 0.4877, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.5547945205479454, |
|
"grad_norm": 12.521992683410645, |
|
"learning_rate": 4.4726027397260276e-06, |
|
"loss": 0.39, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 2.5616438356164384, |
|
"grad_norm": 8.952247619628906, |
|
"learning_rate": 4.404109589041095e-06, |
|
"loss": 0.3797, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.5684931506849313, |
|
"grad_norm": 22.96004867553711, |
|
"learning_rate": 4.335616438356164e-06, |
|
"loss": 0.3642, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 2.5753424657534247, |
|
"grad_norm": 15.552525520324707, |
|
"learning_rate": 4.267123287671233e-06, |
|
"loss": 0.3611, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.5821917808219177, |
|
"grad_norm": 6.638945579528809, |
|
"learning_rate": 4.198630136986301e-06, |
|
"loss": 0.3153, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 2.589041095890411, |
|
"grad_norm": 22.522302627563477, |
|
"learning_rate": 4.13013698630137e-06, |
|
"loss": 0.4371, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.595890410958904, |
|
"grad_norm": 9.711662292480469, |
|
"learning_rate": 4.0616438356164385e-06, |
|
"loss": 0.3819, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 2.602739726027397, |
|
"grad_norm": 13.974068641662598, |
|
"learning_rate": 3.993150684931506e-06, |
|
"loss": 0.3498, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.6095890410958904, |
|
"grad_norm": 16.810501098632812, |
|
"learning_rate": 3.924657534246575e-06, |
|
"loss": 0.3499, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 2.616438356164384, |
|
"grad_norm": 12.136087417602539, |
|
"learning_rate": 3.8561643835616435e-06, |
|
"loss": 0.3913, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.6232876712328768, |
|
"grad_norm": 20.14099884033203, |
|
"learning_rate": 3.787671232876712e-06, |
|
"loss": 0.3576, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 2.6301369863013697, |
|
"grad_norm": 3.969587802886963, |
|
"learning_rate": 3.719178082191781e-06, |
|
"loss": 0.4321, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.636986301369863, |
|
"grad_norm": 2.799744129180908, |
|
"learning_rate": 3.6506849315068494e-06, |
|
"loss": 0.3585, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 2.643835616438356, |
|
"grad_norm": 48.521202087402344, |
|
"learning_rate": 3.582191780821918e-06, |
|
"loss": 0.406, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.6506849315068495, |
|
"grad_norm": 15.20323371887207, |
|
"learning_rate": 3.5136986301369866e-06, |
|
"loss": 0.3498, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 2.6575342465753424, |
|
"grad_norm": 14.380341529846191, |
|
"learning_rate": 3.445205479452055e-06, |
|
"loss": 0.3952, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.6643835616438354, |
|
"grad_norm": 21.444190979003906, |
|
"learning_rate": 3.3767123287671235e-06, |
|
"loss": 0.4067, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 2.671232876712329, |
|
"grad_norm": 12.899758338928223, |
|
"learning_rate": 3.308219178082192e-06, |
|
"loss": 0.4587, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.678082191780822, |
|
"grad_norm": 9.967342376708984, |
|
"learning_rate": 3.2397260273972603e-06, |
|
"loss": 0.3656, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 2.684931506849315, |
|
"grad_norm": 5.67423677444458, |
|
"learning_rate": 3.171232876712329e-06, |
|
"loss": 0.4351, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.691780821917808, |
|
"grad_norm": 14.479686737060547, |
|
"learning_rate": 3.1027397260273976e-06, |
|
"loss": 0.3801, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 2.6986301369863015, |
|
"grad_norm": 17.046621322631836, |
|
"learning_rate": 3.034246575342466e-06, |
|
"loss": 0.4302, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.7054794520547945, |
|
"grad_norm": 10.318126678466797, |
|
"learning_rate": 2.9657534246575344e-06, |
|
"loss": 0.3594, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 2.712328767123288, |
|
"grad_norm": 24.85907554626465, |
|
"learning_rate": 2.897260273972603e-06, |
|
"loss": 0.3195, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.719178082191781, |
|
"grad_norm": 16.129796981811523, |
|
"learning_rate": 2.8287671232876716e-06, |
|
"loss": 0.3899, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 2.7260273972602738, |
|
"grad_norm": 13.561001777648926, |
|
"learning_rate": 2.76027397260274e-06, |
|
"loss": 0.3348, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.732876712328767, |
|
"grad_norm": 32.10982131958008, |
|
"learning_rate": 2.6917808219178085e-06, |
|
"loss": 0.3648, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 49.121280670166016, |
|
"learning_rate": 2.623287671232877e-06, |
|
"loss": 0.4146, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"eval_exact_match": 82.77199621570483, |
|
"eval_f1": 89.87472419974556, |
|
"eval_runtime": 407.372, |
|
"eval_samples_per_second": 25.947, |
|
"eval_steps_per_second": 1.623, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.7465753424657535, |
|
"grad_norm": 10.713053703308105, |
|
"learning_rate": 2.5547945205479453e-06, |
|
"loss": 0.3653, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 2.7534246575342465, |
|
"grad_norm": 20.47135353088379, |
|
"learning_rate": 2.486301369863014e-06, |
|
"loss": 0.4231, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.76027397260274, |
|
"grad_norm": 40.13731002807617, |
|
"learning_rate": 2.4178082191780826e-06, |
|
"loss": 0.3993, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 2.767123287671233, |
|
"grad_norm": 12.861891746520996, |
|
"learning_rate": 2.3493150684931508e-06, |
|
"loss": 0.3912, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.7739726027397262, |
|
"grad_norm": 7.921535968780518, |
|
"learning_rate": 2.2808219178082194e-06, |
|
"loss": 0.408, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 2.780821917808219, |
|
"grad_norm": 26.30048179626465, |
|
"learning_rate": 2.2123287671232876e-06, |
|
"loss": 0.4308, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.787671232876712, |
|
"grad_norm": 22.600740432739258, |
|
"learning_rate": 2.1438356164383562e-06, |
|
"loss": 0.363, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 2.7945205479452055, |
|
"grad_norm": 10.7540283203125, |
|
"learning_rate": 2.0753424657534244e-06, |
|
"loss": 0.3956, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.8013698630136985, |
|
"grad_norm": 13.891115188598633, |
|
"learning_rate": 2.006849315068493e-06, |
|
"loss": 0.3441, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 2.808219178082192, |
|
"grad_norm": 14.40695571899414, |
|
"learning_rate": 1.9383561643835617e-06, |
|
"loss": 0.3875, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.815068493150685, |
|
"grad_norm": 19.043682098388672, |
|
"learning_rate": 1.8698630136986303e-06, |
|
"loss": 0.4015, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 2.821917808219178, |
|
"grad_norm": 26.111764907836914, |
|
"learning_rate": 1.8013698630136987e-06, |
|
"loss": 0.3877, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.828767123287671, |
|
"grad_norm": 13.53073787689209, |
|
"learning_rate": 1.7328767123287671e-06, |
|
"loss": 0.3292, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 2.8356164383561646, |
|
"grad_norm": 12.833224296569824, |
|
"learning_rate": 1.6643835616438358e-06, |
|
"loss": 0.3202, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.8424657534246576, |
|
"grad_norm": 12.937023162841797, |
|
"learning_rate": 1.5958904109589042e-06, |
|
"loss": 0.3709, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 2.8493150684931505, |
|
"grad_norm": 30.91938018798828, |
|
"learning_rate": 1.5273972602739726e-06, |
|
"loss": 0.4258, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.856164383561644, |
|
"grad_norm": 50.359283447265625, |
|
"learning_rate": 1.4589041095890412e-06, |
|
"loss": 0.3572, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 2.863013698630137, |
|
"grad_norm": 7.755626201629639, |
|
"learning_rate": 1.3904109589041096e-06, |
|
"loss": 0.3985, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.8698630136986303, |
|
"grad_norm": 33.82756805419922, |
|
"learning_rate": 1.3219178082191783e-06, |
|
"loss": 0.3494, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 2.8767123287671232, |
|
"grad_norm": 44.668338775634766, |
|
"learning_rate": 1.2534246575342467e-06, |
|
"loss": 0.4596, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.883561643835616, |
|
"grad_norm": 24.665861129760742, |
|
"learning_rate": 1.184931506849315e-06, |
|
"loss": 0.3069, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 2.8904109589041096, |
|
"grad_norm": 13.378949165344238, |
|
"learning_rate": 1.1164383561643837e-06, |
|
"loss": 0.3873, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.897260273972603, |
|
"grad_norm": 8.084388732910156, |
|
"learning_rate": 1.0479452054794521e-06, |
|
"loss": 0.3911, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 2.904109589041096, |
|
"grad_norm": 8.717424392700195, |
|
"learning_rate": 9.794520547945205e-07, |
|
"loss": 0.4187, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.910958904109589, |
|
"grad_norm": 10.450674057006836, |
|
"learning_rate": 9.123287671232876e-07, |
|
"loss": 0.3498, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 2.9178082191780823, |
|
"grad_norm": 8.336039543151855, |
|
"learning_rate": 8.438356164383562e-07, |
|
"loss": 0.3815, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.9246575342465753, |
|
"grad_norm": 25.949018478393555, |
|
"learning_rate": 7.753424657534247e-07, |
|
"loss": 0.3061, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 2.9315068493150687, |
|
"grad_norm": 3.685792922973633, |
|
"learning_rate": 7.068493150684931e-07, |
|
"loss": 0.4067, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.9383561643835616, |
|
"grad_norm": 20.176740646362305, |
|
"learning_rate": 6.383561643835616e-07, |
|
"loss": 0.4145, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 2.9452054794520546, |
|
"grad_norm": 6.9156270027160645, |
|
"learning_rate": 5.698630136986301e-07, |
|
"loss": 0.3644, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.952054794520548, |
|
"grad_norm": 16.190839767456055, |
|
"learning_rate": 5.013698630136987e-07, |
|
"loss": 0.3783, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 2.958904109589041, |
|
"grad_norm": 14.747089385986328, |
|
"learning_rate": 4.3287671232876714e-07, |
|
"loss": 0.4057, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.9657534246575343, |
|
"grad_norm": 28.770313262939453, |
|
"learning_rate": 3.643835616438356e-07, |
|
"loss": 0.3261, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 2.9726027397260273, |
|
"grad_norm": 6.209177017211914, |
|
"learning_rate": 2.958904109589041e-07, |
|
"loss": 0.4036, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.9794520547945207, |
|
"grad_norm": 28.698171615600586, |
|
"learning_rate": 2.273972602739726e-07, |
|
"loss": 0.3678, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 2.9863013698630136, |
|
"grad_norm": 7.778885364532471, |
|
"learning_rate": 1.589041095890411e-07, |
|
"loss": 0.3814, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.993150684931507, |
|
"grad_norm": 28.04308319091797, |
|
"learning_rate": 9.04109589041096e-08, |
|
"loss": 0.3538, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.1987411975860596, |
|
"learning_rate": 2.1917808219178083e-08, |
|
"loss": 0.3481, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 21900, |
|
"total_flos": 2.006738209660207e+18, |
|
"train_loss": 0.7586759792397556, |
|
"train_runtime": 82472.0176, |
|
"train_samples_per_second": 3.186, |
|
"train_steps_per_second": 0.266 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 21900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.006738209660207e+18, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|