Commit
·
ef4a3ca
1
Parent(s):
751f3ef
Training in progress, step 500
Browse files- all_results.json +9 -9
- post-training eval_results.json +5 -5
- prediction_output.jsonl +1 -1
- pytorch_model.bin +1 -1
- train_results.json +4 -4
- trainer_state.json +186 -186
- training_args.bin +1 -1
all_results.json
CHANGED
@@ -1,14 +1,14 @@
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"eval_samples": 98,
|
4 |
-
"test_f1": 0.
|
5 |
-
"test_loss": 1.
|
6 |
-
"test_runtime": 3.
|
7 |
-
"test_samples_per_second": 29.
|
8 |
-
"test_steps_per_second": 29.
|
9 |
-
"train_loss": 0.
|
10 |
-
"train_runtime":
|
11 |
"train_samples": 702,
|
12 |
-
"train_samples_per_second": 4.
|
13 |
-
"train_steps_per_second": 4.
|
14 |
}
|
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
"eval_samples": 98,
|
4 |
+
"test_f1": 0.5515045914952008,
|
5 |
+
"test_loss": 1.7193970680236816,
|
6 |
+
"test_runtime": 3.325,
|
7 |
+
"test_samples_per_second": 29.474,
|
8 |
+
"test_steps_per_second": 29.474,
|
9 |
+
"train_loss": 0.6706694952103487,
|
10 |
+
"train_runtime": 824.1732,
|
11 |
"train_samples": 702,
|
12 |
+
"train_samples_per_second": 4.259,
|
13 |
+
"train_steps_per_second": 4.259
|
14 |
}
|
post-training eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"eval_samples": 98,
|
3 |
-
"test_f1": 0.
|
4 |
-
"test_loss": 1.
|
5 |
-
"test_runtime": 3.
|
6 |
-
"test_samples_per_second": 29.
|
7 |
-
"test_steps_per_second": 29.
|
8 |
}
|
|
|
1 |
{
|
2 |
"eval_samples": 98,
|
3 |
+
"test_f1": 0.5515045914952008,
|
4 |
+
"test_loss": 1.7193970680236816,
|
5 |
+
"test_runtime": 3.325,
|
6 |
+
"test_samples_per_second": 29.474,
|
7 |
+
"test_steps_per_second": 29.474
|
8 |
}
|
prediction_output.jsonl
CHANGED
@@ -1 +1 @@
|
|
1 |
-
[{"pred": 10.053011894226074, "label": 0.0}, {"pred": 1.2265956401824951, "label": 5.0}, {"pred": -0.5370747447013855, "label": 2.0}, {"pred": 0.1752518266439438, "label": 4.0}, {"pred": -2.031033754348755, "label": 2.0}, {"pred": -3.3496036529541016, "label": 8.0}, {"pred": -1.9880871772766113, "label": 8.0}, {"pred": -3.086259126663208, "label": 2.0}, {"pred": -1.1470110416412354, "label": 8.0}, {"pred": 1.037685513496399, "label": 2.0}, {"pred": -1.913681149482727, "label": 2.0}, {"pred": 0.26967066526412964, "label": 2.0}, {"pred": 5.334761142730713, "label": 5.0}, {"pred": -1.662973165512085, "label": 5.0}, {"pred": 2.8667726516723633, "label": 5.0}, {"pred": -0.8025294542312622, "label": 5.0}, {"pred": -1.7527247667312622, "label": 5.0}, {"pred": -3.357849359512329, "label": 6.0}, {"pred": 1.181986927986145, "label": 6.0}, {"pred": 1.7410268783569336, "label": 5.0}, {"pred": 7.933586597442627, "label": 8.0}, {"pred": 3.3055169582366943, "label": 8.0}, {"pred": -2.8542656898498535, "label": 2.0}, {"pred": -1.19741690158844, "label": 5.0}, {"pred": -4.84285306930542, "label": 5.0}, {"pred": -2.683229923248291, "label": 5.0}, {"pred": -2.0367188453674316, "label": 2.0}, {"pred": -0.3946422338485718, "label": 2.0}, {"pred": 1.2412188053131104, "label": 2.0}, {"pred": 8.913151741027832, "label": 3.0}, {"pred": 2.1114447116851807, "label": 3.0}, {"pred": -0.3422248363494873, "label": 5.0}, {"pred": -1.5180871486663818, "label": 0.0}, {"pred": -3.858212471008301, "label": 2.0}, {"pred": -3.058220624923706, "label": 3.0}, {"pred": -2.8904712200164795, "label": 3.0}, {"pred": -3.0549261569976807, "label": 3.0}, {"pred": -0.9003552794456482, "label": 7.0}, {"pred": 9.505762100219727, "label": 8.0}, {"pred": -0.5478419065475464, "label": 7.0}, {"pred": -0.37932345271110535, "label": 7.0}, {"pred": -0.5652313828468323, "label": 2.0}, {"pred": -3.1210832595825195, "label": 2.0}, {"pred": -2.2201759815216064, "label": 3.0}, {"pred": -0.22374841570854187, "label": 8.0}, {"pred": -0.4147443473339081, "label": 0.0}, {"pred": -2.522362232208252, "label": 2.0}, {"pred": -0.3626328706741333, "label": 2.0}, {"pred": -0.5300700664520264, "label": 2.0}, {"pred": -0.03919024020433426, "label": 2.0}, {"pred": -0.5216307044029236, "label": 2.0}, {"pred": -2.4257967472076416, "label": 2.0}, {"pred": -0.43984726071357727, "label": 0.0}, {"pred": 7.890976428985596, "label": 7.0}, {"pred": -0.8893976211547852, "label": 7.0}, {"pred": -2.06447434425354, "label": 7.0}, {"pred": -1.5249279737472534, "label": 7.0}, {"pred": -0.5461889505386353, "label": 8.0}, {"pred": -0.3898739814758301, "label": 8.0}, {"pred": -0.2674681544303894, "label": 8.0}, {"pred": -1.5843223333358765, "label": 3.0}, {"pred": -0.9582051038742065, "label": 6.0}, {"pred": 10.02863597869873, "label": 0.0}, {"pred": 2.1390559673309326, "label": 2.0}, {"pred": 0.5683819651603699, "label": 3.0}, {"pred": 7.809494495391846, "label": 7.0}, {"pred": 4.672691345214844, "label": 0.0}, {"pred": -3.1617870330810547, "label": 5.0}, {"pred": -2.968081474304199, "label": 2.0}, {"pred": -4.236698627471924, "label": 2.0}, {"pred": -2.237872362136841, "label": 7.0}, {"pred": -2.4456675052642822, "label": 2.0}, {"pred": 0.0029354728758335114, "label": 2.0}, {"pred": -1.3975402116775513, "label": 2.0}, {"pred": -0.45201340317726135, "label": 8.0}, {"pred": -0.5411576628684998, "label": 8.0}, {"pred": -1.1021785736083984, "label": 3.0}, {"pred": -0.03798329457640648, "label": 0.0}, {"pred": -2.4011411666870117, "label": 0.0}, {"pred": -1.8443877696990967, "label": 2.0}, {"pred": 9.534435272216797, "label": 2.0}, {"pred": -2.261758804321289, "label": 2.0}, {"pred": -0.29918554425239563, "label": 8.0}, {"pred": 9.580008506774902, "label": 6.0}, {"pred": -1.6922998428344727, "label": 2.0}, {"pred": -0.11596175283193588, "label": 2.0}, {"pred": -1.1728023290634155, "label": 8.0}, {"pred": -2.624927282333374, "label": 0.0}, {"pred": -2.2074270248413086, "label": 2.0}, {"pred": 0.1966685950756073, "label": 3.0}, {"pred": -2.8291454315185547, "label": 3.0}, {"pred": -1.2406705617904663, "label": 2.0}, {"pred": 7.857271671295166, "label": 6.0}, {"pred": 3.400002956390381, "label": 0.0}, {"pred": -1.4394145011901855, "label": 0.0}, {"pred": 1.8180431127548218, "label": 5.0}, {"pred": -2.2545340061187744, "label": 5.0}, {"pred": -2.8155367374420166, "label": 5.0}, {"pred": -2.5808486938476562, "label": 5.0}, {"pred": 0.629129946231842, "label": 5.0}, {"pred": -0.1851099729537964, "label": 2.0}, {"pred": 0.6262603998184204, "label": 2.0}, {"pred": 0.04001408815383911, "label": 2.0}, {"pred": -2.094447612762451, "label": 5.0}, {"pred": 1.2242151498794556, "label": 4.0}, {"pred": 6.133328914642334, "label": 8.0}, {"pred": -3.5184497833251953, "label": 4.0}, {"pred": -2.7859067916870117, "label": 4.0}, {"pred": -2.7572460174560547, "label": 4.0}, {"pred": -2.47908616065979, "label": 8.0}, {"pred": -0.5159931182861328, "label": 2.0}, {"pred": 0.07365216314792633, "label": 3.0}, {"pred": 4.490240573883057, "label": 2.0}, {"pred": 6.107156276702881, "label": 5.0}, {"pred": -2.4094760417938232, "label": 5.0}, {"pred": -2.298379898071289, "label": 5.0}, {"pred": -3.0487470626831055, "label": 0.0}, {"pred": -1.3762894868850708, "label": 6.0}, {"pred": -2.8625693321228027, "label": 5.0}, {"pred": -1.4980090856552124, "label": 5.0}, {"pred": -0.9184617400169373, "label": 5.0}, {"pred": -0.7868920564651489, "label": 2.0}, {"pred": 4.807824611663818, "label": 2.0}, {"pred": 4.986772060394287, "label": 2.0}, {"pred": -2.0342037677764893, "label": 2.0}, {"pred": -0.8398276567459106, "label": 2.0}, {"pred": -1.2112452983856201, "label": 2.0}, {"pred": -2.7561631202697754, "label": 5.0}, {"pred": -2.6678245067596436, "label": 5.0}, {"pred": -0.8507716059684753, "label": 6.0}, {"pred": 1.9779833555221558, "label": 6.0}, {"pred": 7.1783857345581055, "label": 0.0}, {"pred": 2.3628337383270264, "label": 2.0}, {"pred": -1.8503243923187256, "label": 2.0}, {"pred": -3.973742961883545, "label": 6.0}, {"pred": -0.961530327796936, "label": 2.0}, {"pred": -2.3103420734405518, "label": 5.0}, {"pred": 0.1533740609884262, "label": 5.0}, {"pred": 0.945532500743866, "label": 6.0}, {"pred": -1.9985706806182861, "label": 2.0}, {"pred": 2.2117724418640137, "label": 5.0}, {"pred": 6.319922924041748, "label": 2.0}, {"pred": -2.8318819999694824, "label": 8.0}, {"pred": -3.428406238555908, "label": 2.0}, {"pred": -2.51615047454834, "label": 2.0}, {"pred": -2.9052319526672363, "label": 5.0}, {"pred": 0.7762099504470825, "label": 8.0}, {"pred": 0.3957656919956207, "label": 5.0}, {"pred": -1.3153777122497559, "label": 5.0}, {"pred": 4.028310298919678, "label": 5.0}, {"pred": 5.811086177825928, "label": 5.0}, {"pred": -2.535435914993286, "label": 5.0}, {"pred": -3.497481107711792, "label": 6.0}, {"pred": 0.04644595459103584, "label": 8.0}, {"pred": -0.3115100562572479, "label": 6.0}, {"pred": 0.016584614291787148, "label": 7.0}, {"pred": -0.7723070979118347, "label": 6.0}, {"pred": -1.5444141626358032, "label": 8.0}, {"pred": -1.141904354095459, "label": 0.0}, {"pred": 8.480439186096191, "label": 2.0}, {"pred": -2.3408584594726562, "label": 6.0}, {"pred": -1.7245216369628906, "label": 5.0}, {"pred": 1.5875722169876099, "label": 5.0}, {"pred": -0.9754578471183777, "label": 2.0}, {"pred": -2.217231035232544, "label": 2.0}, {"pred": -0.6310365796089172, "label": 8.0}, {"pred": -0.9391398429870605, "label": 8.0}, {"pred": -1.012092113494873, "label": 0.0}, {"pred": 8.798238754272461, "label": 2.0}, {"pred": -3.1681442260742188, "label": 0.0}, {"pred": -0.884898841381073, "label": 7.0}, {"pred": 1.2964022159576416, "label": 2.0}, {"pred": -2.4020252227783203, "label": 2.0}, {"pred": -3.692897319793701, "label": 8.0}, {"pred": 0.48183152079582214, "label": 6.0}, {"pred": -0.42652153968811035, "label": 6.0}, {"pred": 1.3932526111602783, "label": 5.0}, {"pred": 7.640524864196777, "label": 5.0}, {"pred": -3.08962082862854, "label": 5.0}, {"pred": -1.635117530822754, "label": 5.0}, {"pred": -0.565514326095581, "label": 5.0}, {"pred": -2.547136068344116, "label": 5.0}, {"pred": -0.6833630204200745, "label": 5.0}, {"pred": -0.7127103209495544, "label": 6.0}, {"pred": -0.14554797112941742, "label": 8.0}, {"pred": -0.12966954708099365, "label": 5.0}, {"pred": -2.2960150241851807, "label": 5.0}, {"pred": -0.6724411249160767, "label": 3.0}, {"pred": 8.128778457641602, "label": 3.0}, {"pred": -1.071781873703003, "label": 6.0}, {"pred": -1.9194085597991943, "label": 2.0}, {"pred": -1.54249906539917, "label": 3.0}, {"pred": -0.6316580176353455, "label": 2.0}, {"pred": -0.36814042925834656, "label": 2.0}, {"pred": -0.3363884389400482, "label": 2.0}, {"pred": -1.6689341068267822, "label": 0.0}, {"pred": -1.132188320159912, "label": 2.0}, {"pred": 10.208016395568848, "label": 5.0}, {"pred": -0.5401054620742798, "label": 5.0}, {"pred": 0.09691311419010162, "label": 5.0}, {"pred": 8.53925609588623, "label": 2.0}, {"pred": 4.391894340515137, "label": 6.0}, {"pred": -3.2388341426849365, "label": 3.0}, {"pred": -2.586022138595581, "label": 3.0}, {"pred": -1.3518240451812744, "label": 6.0}, {"pred": -2.103663444519043, "label": 3.0}, {"pred": -1.933193564414978, "label": 2.0}, {"pred": 1.1616668701171875, "label": 8.0}, {"pred": -1.3652011156082153, "label": 8.0}, {"pred": -4.48310661315918, "label": 0.0}, {"pred": -0.6386572122573853, "label": 1.0}, {"pred": 0.311795175075531, "label": 2.0}, {"pred": 2.243385076522827, "label": 3.0}, {"pred": 7.526029109954834, "label": 6.0}, {"pred": -3.0103445053100586, "label": 8.0}, {"pred": -1.4424251317977905, "label": 7.0}, {"pred": -2.952651262283325, "label": 0.0}, {"pred": -0.9500249028205872, "label": 8.0}, {"pred": 1.3159178495407104, "label": 2.0}, {"pred": 1.183246374130249, "label": 2.0}, {"pred": -2.141357183456421, "label": 5.0}, {"pred": 7.404512405395508, "label": 5.0}, {"pred": 1.0727956295013428, "label": 7.0}, {"pred": -3.514127492904663, "label": 2.0}, {"pred": -3.1212284564971924, "label": 2.0}, {"pred": 0.5527191758155823, "label": 8.0}, {"pred": -0.5971601605415344, "label": 5.0}, {"pred": 5.6161651611328125, "label": 5.0}, {"pred": 4.333760738372803, "label": 2.0}, {"pred": -3.9194581508636475, "label": 6.0}, {"pred": 2.415543556213379, "label": 6.0}, {"pred": -3.0811643600463867, "label": 2.0}, {"pred": -3.876741647720337, "label": 5.0}, {"pred": -1.4490783214569092, "label": 2.0}, {"pred": -2.84633207321167, "label": 5.0}, {"pred": -0.467058002948761, "label": 5.0}, {"pred": 9.251620292663574, "label": 8.0}, {"pred": -1.4969630241394043, "label": 5.0}, {"pred": -0.1453137993812561, "label": 2.0}, {"pred": -0.724045991897583, "label": 6.0}, {"pred": -2.866516351699829, "label": 6.0}, {"pred": -2.0785751342773438, "label": 5.0}, {"pred": 0.3289041221141815, "label": 6.0}, {"pred": -1.6521097421646118, "label": 2.0}, {"pred": -0.6320829391479492, "label": 2.0}, {"pred": 7.514566421508789, "label": 5.0}, {"pred": 4.6928019523620605, "label": 5.0}, {"pred": -3.0128238201141357, "label": 0.0}, {"pred": -1.0718092918395996, "label": 5.0}, {"pred": -3.47845721244812, "label": 2.0}, {"pred": 0.35053759813308716, "label": 2.0}, {"pred": -0.9066526293754578, "label": 2.0}, {"pred": -4.176146984100342, "label": 5.0}, {"pred": -2.598372220993042, "label": 2.0}, {"pred": 7.388974666595459, "label": 6.0}, {"pred": -0.4163927137851715, "label": 6.0}, {"pred": -1.299724817276001, "label": 6.0}, {"pred": 4.256181716918945, "label": 6.0}, {"pred": -0.24266250431537628, "label": 5.0}, {"pred": -2.206273317337036, "label": 5.0}, {"pred": -1.403360366821289, "label": 5.0}, {"pred": -2.5257246494293213, "label": 5.0}, {"pred": -2.7049272060394287, "label": 5.0}, {"pred": 4.336051940917969, "label": 2.0}, {"pred": 6.037196636199951, "label": 5.0}, {"pred": -3.0442888736724854, "label": 8.0}, {"pred": 1.1787348985671997, "label": 8.0}, {"pred": -0.4310672879219055, "label": 0.0}, {"pred": -0.17733784019947052, "label": 3.0}, {"pred": -1.1147769689559937, "label": 2.0}, {"pred": 0.8757383823394775, "label": 2.0}, {"pred": -1.8259029388427734, "label": 2.0}, {"pred": 0.1079450473189354, "label": 8.0}, {"pred": -1.332146406173706, "label": 8.0}, {"pred": -1.5691189765930176, "label": 0.0}, {"pred": 8.07284164428711, "label": 2.0}, {"pred": 1.6381406784057617, "label": 0.0}, {"pred": -2.8433923721313477, "label": 7.0}, {"pred": -3.629115581512451, "label": 0.0}, {"pred": -4.5651140213012695, "label": 0.0}, {"pred": -1.93071448802948, "label": 0.0}, {"pred": 1.3184444904327393, "label": 8.0}, {"pred": 1.7017006874084473, "label": 8.0}, {"pred": -1.1318089962005615, "label": 0.0}, {"pred": 6.206396579742432, "label": 0.0}, {"pred": 2.876361131668091, "label": 2.0}, {"pred": -1.8594180345535278, "label": 2.0}, {"pred": -2.608898878097534, "label": 0.0}]
|
|
|
1 |
+
[{"pred": 10.290809631347656, "label": 0.0}, {"pred": 0.9703102111816406, "label": 5.0}, {"pred": -1.5968129634857178, "label": 2.0}, {"pred": 0.7235668897628784, "label": 4.0}, {"pred": -3.0565438270568848, "label": 2.0}, {"pred": -2.3618180751800537, "label": 8.0}, {"pred": -1.6507198810577393, "label": 8.0}, {"pred": -2.724348783493042, "label": 2.0}, {"pred": -0.7913642525672913, "label": 8.0}, {"pred": -0.1787530481815338, "label": 2.0}, {"pred": -3.9603071212768555, "label": 2.0}, {"pred": 0.03501487895846367, "label": 2.0}, {"pred": 6.6571478843688965, "label": 5.0}, {"pred": -2.7573740482330322, "label": 5.0}, {"pred": 2.7549917697906494, "label": 5.0}, {"pred": -0.9166736602783203, "label": 5.0}, {"pred": 1.1418240070343018, "label": 5.0}, {"pred": -1.7485498189926147, "label": 6.0}, {"pred": 1.0153459310531616, "label": 6.0}, {"pred": 2.065824508666992, "label": 5.0}, {"pred": 7.917908191680908, "label": 8.0}, {"pred": 2.5800442695617676, "label": 8.0}, {"pred": -3.0683484077453613, "label": 2.0}, {"pred": 0.3347143530845642, "label": 5.0}, {"pred": -2.8323209285736084, "label": 5.0}, {"pred": -2.9949421882629395, "label": 5.0}, {"pred": -3.9032864570617676, "label": 2.0}, {"pred": -0.9693252444267273, "label": 2.0}, {"pred": 0.9653197526931763, "label": 2.0}, {"pred": 8.178954124450684, "label": 3.0}, {"pred": 3.3416378498077393, "label": 3.0}, {"pred": -1.3156547546386719, "label": 5.0}, {"pred": -1.044263243675232, "label": 0.0}, {"pred": -3.586060047149658, "label": 2.0}, {"pred": -1.6682804822921753, "label": 3.0}, {"pred": -3.6979148387908936, "label": 3.0}, {"pred": -3.4306604862213135, "label": 3.0}, {"pred": -0.3065505027770996, "label": 7.0}, {"pred": 10.175311088562012, "label": 8.0}, {"pred": -2.8946011066436768, "label": 7.0}, {"pred": -0.09801101684570312, "label": 7.0}, {"pred": 0.9370686411857605, "label": 2.0}, {"pred": -1.5044194459915161, "label": 2.0}, {"pred": -1.840678334236145, "label": 3.0}, {"pred": -0.3210545480251312, "label": 8.0}, {"pred": -1.5668078660964966, "label": 0.0}, {"pred": -1.9491629600524902, "label": 2.0}, {"pred": -0.9471103549003601, "label": 2.0}, {"pred": -1.2616881132125854, "label": 2.0}, {"pred": -1.0521950721740723, "label": 2.0}, {"pred": -0.00420457124710083, "label": 2.0}, {"pred": -1.8757306337356567, "label": 2.0}, {"pred": -1.201255440711975, "label": 0.0}, {"pred": 7.710162162780762, "label": 7.0}, {"pred": -2.4759974479675293, "label": 7.0}, {"pred": -2.5288870334625244, "label": 7.0}, {"pred": -0.9985426664352417, "label": 7.0}, {"pred": -1.529648780822754, "label": 8.0}, {"pred": -1.32073974609375, "label": 8.0}, {"pred": -0.8256033062934875, "label": 8.0}, {"pred": -1.5793683528900146, "label": 3.0}, {"pred": -1.1773693561553955, "label": 6.0}, {"pred": 9.405431747436523, "label": 0.0}, {"pred": 3.3469057083129883, "label": 2.0}, {"pred": 1.315601110458374, "label": 3.0}, {"pred": 6.369998931884766, "label": 7.0}, {"pred": 5.687730312347412, "label": 0.0}, {"pred": -3.7174370288848877, "label": 5.0}, {"pred": -3.179921865463257, "label": 2.0}, {"pred": -3.663841724395752, "label": 2.0}, {"pred": -1.97105872631073, "label": 7.0}, {"pred": -3.0646395683288574, "label": 2.0}, {"pred": -1.2003767490386963, "label": 2.0}, {"pred": -1.9717252254486084, "label": 2.0}, {"pred": -0.3366681635379791, "label": 8.0}, {"pred": -1.3070366382598877, "label": 8.0}, {"pred": -0.8485084176063538, "label": 3.0}, {"pred": -0.20988516509532928, "label": 0.0}, {"pred": -2.292656660079956, "label": 0.0}, {"pred": -2.090421438217163, "label": 2.0}, {"pred": 9.354241371154785, "label": 2.0}, {"pred": -3.073307514190674, "label": 2.0}, {"pred": 0.21364711225032806, "label": 8.0}, {"pred": 9.965922355651855, "label": 6.0}, {"pred": -3.118712902069092, "label": 2.0}, {"pred": -0.03438292443752289, "label": 2.0}, {"pred": 0.2002769261598587, "label": 8.0}, {"pred": -1.063583254814148, "label": 0.0}, {"pred": -1.177560567855835, "label": 2.0}, {"pred": 0.24500791728496552, "label": 3.0}, {"pred": -2.689775228500366, "label": 3.0}, {"pred": -1.0503861904144287, "label": 2.0}, {"pred": 7.594336032867432, "label": 6.0}, {"pred": 2.1890816688537598, "label": 0.0}, {"pred": -1.7860941886901855, "label": 0.0}, {"pred": 5.3757171630859375, "label": 5.0}, {"pred": -1.2799867391586304, "label": 5.0}, {"pred": -2.6802003383636475, "label": 5.0}, {"pred": -3.188868761062622, "label": 5.0}, {"pred": 1.7214399576187134, "label": 5.0}, {"pred": -1.2691649198532104, "label": 2.0}, {"pred": 0.995307207107544, "label": 2.0}, {"pred": 5.972494125366211, "label": 2.0}, {"pred": -3.2425472736358643, "label": 5.0}, {"pred": 0.8252993226051331, "label": 4.0}, {"pred": 0.8438105583190918, "label": 8.0}, {"pred": -1.5899057388305664, "label": 4.0}, {"pred": -3.049546241760254, "label": 4.0}, {"pred": -1.266801357269287, "label": 4.0}, {"pred": -1.5122487545013428, "label": 8.0}, {"pred": -1.030104160308838, "label": 2.0}, {"pred": -0.673987627029419, "label": 3.0}, {"pred": 5.558537483215332, "label": 2.0}, {"pred": 6.735321998596191, "label": 5.0}, {"pred": -0.7411388158798218, "label": 5.0}, {"pred": -1.8602837324142456, "label": 5.0}, {"pred": -2.5063188076019287, "label": 0.0}, {"pred": -0.9122320413589478, "label": 6.0}, {"pred": -2.421311616897583, "label": 5.0}, {"pred": -1.025620937347412, "label": 5.0}, {"pred": -1.5433242321014404, "label": 5.0}, {"pred": -0.10171565413475037, "label": 2.0}, {"pred": 2.286135673522949, "label": 2.0}, {"pred": 7.0758748054504395, "label": 2.0}, {"pred": -2.814152240753174, "label": 2.0}, {"pred": 0.2042846530675888, "label": 2.0}, {"pred": -0.3653508126735687, "label": 2.0}, {"pred": -1.995278000831604, "label": 5.0}, {"pred": -1.831222653388977, "label": 5.0}, {"pred": -1.064313292503357, "label": 6.0}, {"pred": 2.56392765045166, "label": 6.0}, {"pred": 8.376298904418945, "label": 0.0}, {"pred": 1.3188626766204834, "label": 2.0}, {"pred": -2.23551607131958, "label": 2.0}, {"pred": -2.767118215560913, "label": 6.0}, {"pred": -0.0966845378279686, "label": 2.0}, {"pred": -1.313618540763855, "label": 5.0}, {"pred": -0.4058559834957123, "label": 5.0}, {"pred": 1.0895838737487793, "label": 6.0}, {"pred": -2.1371374130249023, "label": 2.0}, {"pred": 1.4900816679000854, "label": 5.0}, {"pred": 6.579888343811035, "label": 2.0}, {"pred": -3.2009799480438232, "label": 8.0}, {"pred": -2.073420524597168, "label": 2.0}, {"pred": -1.242249608039856, "label": 2.0}, {"pred": -1.9888185262680054, "label": 5.0}, {"pred": 0.48061397671699524, "label": 8.0}, {"pred": 0.4645574390888214, "label": 5.0}, {"pred": -1.0836477279663086, "label": 5.0}, {"pred": 2.5422253608703613, "label": 5.0}, {"pred": 6.876040458679199, "label": 5.0}, {"pred": -3.140732765197754, "label": 5.0}, {"pred": -2.5778374671936035, "label": 6.0}, {"pred": 0.025611255317926407, "label": 8.0}, {"pred": 1.1743779182434082, "label": 6.0}, {"pred": 0.28550976514816284, "label": 7.0}, {"pred": -0.5769954323768616, "label": 6.0}, {"pred": -1.9950687885284424, "label": 8.0}, {"pred": -0.8799560070037842, "label": 0.0}, {"pred": 7.703958511352539, "label": 2.0}, {"pred": -1.9189386367797852, "label": 6.0}, {"pred": -3.1383209228515625, "label": 5.0}, {"pred": 2.151003837585449, "label": 5.0}, {"pred": -0.1051066666841507, "label": 2.0}, {"pred": -2.95643949508667, "label": 2.0}, {"pred": -0.4773956835269928, "label": 8.0}, {"pred": -1.499645471572876, "label": 8.0}, {"pred": -0.9763684868812561, "label": 0.0}, {"pred": 8.102483749389648, "label": 2.0}, {"pred": -2.9032540321350098, "label": 0.0}, {"pred": -1.7431988716125488, "label": 7.0}, {"pred": 3.5589823722839355, "label": 2.0}, {"pred": -1.2039445638656616, "label": 2.0}, {"pred": -4.388154029846191, "label": 8.0}, {"pred": 0.5220628380775452, "label": 6.0}, {"pred": -0.9276381134986877, "label": 6.0}, {"pred": 0.7515405416488647, "label": 5.0}, {"pred": 6.313348770141602, "label": 5.0}, {"pred": -3.2387163639068604, "label": 5.0}, {"pred": -1.8151580095291138, "label": 5.0}, {"pred": -1.4219468832015991, "label": 5.0}, {"pred": -1.951231598854065, "label": 5.0}, {"pred": -1.1042416095733643, "label": 5.0}, {"pred": -1.4297571182250977, "label": 6.0}, {"pred": -1.0193731784820557, "label": 8.0}, {"pred": -0.10052667558193207, "label": 5.0}, {"pred": -1.7839338779449463, "label": 5.0}, {"pred": -1.3848637342453003, "label": 3.0}, {"pred": 7.9457926750183105, "label": 3.0}, {"pred": -2.433159828186035, "label": 6.0}, {"pred": -2.399423122406006, "label": 2.0}, {"pred": -0.9965806007385254, "label": 3.0}, {"pred": -1.350462794303894, "label": 2.0}, {"pred": -1.4989750385284424, "label": 2.0}, {"pred": -1.0299936532974243, "label": 2.0}, {"pred": -1.6915379762649536, "label": 0.0}, {"pred": -1.2833725214004517, "label": 2.0}, {"pred": 9.242444038391113, "label": 5.0}, {"pred": -1.860719919204712, "label": 5.0}, {"pred": 0.09586504101753235, "label": 5.0}, {"pred": 8.429362297058105, "label": 2.0}, {"pred": 4.24113655090332, "label": 6.0}, {"pred": -3.6316137313842773, "label": 3.0}, {"pred": -1.099712610244751, "label": 3.0}, {"pred": -0.07706806808710098, "label": 6.0}, {"pred": -2.187944173812866, "label": 3.0}, {"pred": -3.425537586212158, "label": 2.0}, {"pred": 1.153124451637268, "label": 8.0}, {"pred": -1.4549442529678345, "label": 8.0}, {"pred": -2.9547150135040283, "label": 0.0}, {"pred": 0.5087819695472717, "label": 1.0}, {"pred": -0.8468652367591858, "label": 2.0}, {"pred": 4.418088912963867, "label": 3.0}, {"pred": 6.098045349121094, "label": 6.0}, {"pred": -3.040365219116211, "label": 8.0}, {"pred": -2.9563238620758057, "label": 7.0}, {"pred": -1.0640069246292114, "label": 0.0}, {"pred": 0.4458455443382263, "label": 8.0}, {"pred": 2.9810967445373535, "label": 2.0}, {"pred": 1.6232993602752686, "label": 2.0}, {"pred": -2.709993600845337, "label": 5.0}, {"pred": 6.805906772613525, "label": 5.0}, {"pred": 2.1797492504119873, "label": 7.0}, {"pred": -4.349334239959717, "label": 2.0}, {"pred": -4.006707191467285, "label": 2.0}, {"pred": -2.190566301345825, "label": 8.0}, {"pred": -0.9495126008987427, "label": 5.0}, {"pred": 7.999160289764404, "label": 5.0}, {"pred": 3.7388970851898193, "label": 2.0}, {"pred": -2.9454569816589355, "label": 6.0}, {"pred": 2.977922201156616, "label": 6.0}, {"pred": -1.7080835103988647, "label": 2.0}, {"pred": -2.4555013179779053, "label": 5.0}, {"pred": -2.324666738510132, "label": 2.0}, {"pred": -3.444615364074707, "label": 5.0}, {"pred": 0.11816424131393433, "label": 5.0}, {"pred": 9.905989646911621, "label": 8.0}, {"pred": -2.962965488433838, "label": 5.0}, {"pred": 0.030992530286312103, "label": 2.0}, {"pred": 0.42090052366256714, "label": 6.0}, {"pred": -1.4415723085403442, "label": 6.0}, {"pred": -1.0313920974731445, "label": 5.0}, {"pred": 0.4347422420978546, "label": 6.0}, {"pred": -2.240575075149536, "label": 2.0}, {"pred": -0.5948125123977661, "label": 2.0}, {"pred": 6.614602565765381, "label": 5.0}, {"pred": 4.726541042327881, "label": 5.0}, {"pred": -3.396735191345215, "label": 0.0}, {"pred": -0.8261128664016724, "label": 5.0}, {"pred": -2.3094027042388916, "label": 2.0}, {"pred": 0.37960976362228394, "label": 2.0}, {"pred": -3.0307390689849854, "label": 2.0}, {"pred": -3.9579150676727295, "label": 5.0}, {"pred": -2.420196294784546, "label": 2.0}, {"pred": 6.9699907302856445, "label": 6.0}, {"pred": -1.044237494468689, "label": 6.0}, {"pred": -1.1202991008758545, "label": 6.0}, {"pred": 6.221286773681641, "label": 6.0}, {"pred": 1.2837556600570679, "label": 5.0}, {"pred": -1.6445438861846924, "label": 5.0}, {"pred": -2.026249647140503, "label": 5.0}, {"pred": -3.088744640350342, "label": 5.0}, {"pred": -3.18585205078125, "label": 5.0}, {"pred": 3.9137914180755615, "label": 2.0}, {"pred": 5.3143630027771, "label": 5.0}, {"pred": -3.272541046142578, "label": 8.0}, {"pred": 1.741031289100647, "label": 8.0}, {"pred": 1.3089599609375, "label": 0.0}, {"pred": -1.2750519514083862, "label": 3.0}, {"pred": -0.41170594096183777, "label": 2.0}, {"pred": -2.5046517848968506, "label": 2.0}, {"pred": -0.3387974500656128, "label": 2.0}, {"pred": 5.276491641998291, "label": 8.0}, {"pred": -1.4969731569290161, "label": 8.0}, {"pred": -1.0964272022247314, "label": 0.0}, {"pred": 5.757904529571533, "label": 2.0}, {"pred": 0.23799118399620056, "label": 0.0}, {"pred": -3.1069884300231934, "label": 7.0}, {"pred": -1.6246393918991089, "label": 0.0}, {"pred": -3.437112331390381, "label": 0.0}, {"pred": -0.5458642244338989, "label": 0.0}, {"pred": 3.5296177864074707, "label": 8.0}, {"pred": 0.6833158135414124, "label": 8.0}, {"pred": -1.6968615055084229, "label": 0.0}, {"pred": 6.245951175689697, "label": 0.0}, {"pred": 2.876694440841675, "label": 2.0}, {"pred": -3.0809290409088135, "label": 2.0}, {"pred": -3.641925096511841, "label": 0.0}]
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 714922721
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:028c21ee2bcbc4765d7f542294c6a65021701767f2244e0a78e657a7356d484a
|
3 |
size 714922721
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 702,
|
6 |
-
"train_samples_per_second": 4.
|
7 |
-
"train_steps_per_second": 4.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 5.0,
|
3 |
+
"train_loss": 0.6706694952103487,
|
4 |
+
"train_runtime": 824.1732,
|
5 |
"train_samples": 702,
|
6 |
+
"train_samples_per_second": 4.259,
|
7 |
+
"train_steps_per_second": 4.259
|
8 |
}
|
trainer_state.json
CHANGED
@@ -9,369 +9,369 @@
|
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 0.14,
|
12 |
-
"eval_f1": 0.
|
13 |
-
"eval_loss": 1.
|
14 |
-
"eval_runtime": 3.
|
15 |
-
"eval_samples_per_second": 29.
|
16 |
-
"eval_steps_per_second": 29.
|
17 |
"step": 100
|
18 |
},
|
19 |
{
|
20 |
"epoch": 0.28,
|
21 |
-
"eval_f1": 0.
|
22 |
-
"eval_loss": 1.
|
23 |
-
"eval_runtime": 3.
|
24 |
-
"eval_samples_per_second": 29.
|
25 |
-
"eval_steps_per_second": 29.
|
26 |
"step": 200
|
27 |
},
|
28 |
{
|
29 |
"epoch": 0.43,
|
30 |
-
"eval_f1": 0.
|
31 |
-
"eval_loss": 1.
|
32 |
-
"eval_runtime": 3.
|
33 |
-
"eval_samples_per_second": 29.
|
34 |
-
"eval_steps_per_second": 29.
|
35 |
"step": 300
|
36 |
},
|
37 |
{
|
38 |
"epoch": 0.57,
|
39 |
-
"eval_f1": 0.
|
40 |
-
"eval_loss": 1.
|
41 |
-
"eval_runtime": 3.
|
42 |
-
"eval_samples_per_second": 29.
|
43 |
-
"eval_steps_per_second": 29.
|
44 |
"step": 400
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.71,
|
48 |
"learning_rate": 2.572649572649573e-05,
|
49 |
-
"loss": 1.
|
50 |
"step": 500
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.71,
|
54 |
-
"eval_f1": 0.
|
55 |
-
"eval_loss": 1.
|
56 |
-
"eval_runtime": 3.
|
57 |
-
"eval_samples_per_second": 29.
|
58 |
-
"eval_steps_per_second": 29.
|
59 |
"step": 500
|
60 |
},
|
61 |
{
|
62 |
"epoch": 0.85,
|
63 |
-
"eval_f1": 0.
|
64 |
-
"eval_loss": 1.
|
65 |
-
"eval_runtime": 3.
|
66 |
-
"eval_samples_per_second": 29.
|
67 |
-
"eval_steps_per_second": 29.
|
68 |
"step": 600
|
69 |
},
|
70 |
{
|
71 |
"epoch": 1.0,
|
72 |
-
"eval_f1": 0.
|
73 |
-
"eval_loss": 1.
|
74 |
-
"eval_runtime": 3.
|
75 |
-
"eval_samples_per_second": 29.
|
76 |
-
"eval_steps_per_second": 29.
|
77 |
"step": 700
|
78 |
},
|
79 |
{
|
80 |
"epoch": 1.14,
|
81 |
-
"eval_f1": 0.
|
82 |
-
"eval_loss": 1.
|
83 |
-
"eval_runtime": 3.
|
84 |
-
"eval_samples_per_second":
|
85 |
-
"eval_steps_per_second":
|
86 |
"step": 800
|
87 |
},
|
88 |
{
|
89 |
"epoch": 1.28,
|
90 |
-
"eval_f1": 0.
|
91 |
-
"eval_loss": 1.
|
92 |
-
"eval_runtime": 3.
|
93 |
-
"eval_samples_per_second": 29.
|
94 |
-
"eval_steps_per_second": 29.
|
95 |
"step": 900
|
96 |
},
|
97 |
{
|
98 |
"epoch": 1.42,
|
99 |
"learning_rate": 2.1452991452991456e-05,
|
100 |
-
"loss": 0.
|
101 |
"step": 1000
|
102 |
},
|
103 |
{
|
104 |
"epoch": 1.42,
|
105 |
-
"eval_f1": 0.
|
106 |
-
"eval_loss": 1.
|
107 |
-
"eval_runtime": 3.
|
108 |
-
"eval_samples_per_second": 29.
|
109 |
-
"eval_steps_per_second": 29.
|
110 |
"step": 1000
|
111 |
},
|
112 |
{
|
113 |
"epoch": 1.57,
|
114 |
-
"eval_f1": 0.
|
115 |
-
"eval_loss": 1.
|
116 |
-
"eval_runtime": 3.
|
117 |
-
"eval_samples_per_second": 29.
|
118 |
-
"eval_steps_per_second": 29.
|
119 |
"step": 1100
|
120 |
},
|
121 |
{
|
122 |
"epoch": 1.71,
|
123 |
-
"eval_f1": 0.
|
124 |
-
"eval_loss": 1.
|
125 |
-
"eval_runtime": 3.
|
126 |
-
"eval_samples_per_second": 29.
|
127 |
-
"eval_steps_per_second": 29.
|
128 |
"step": 1200
|
129 |
},
|
130 |
{
|
131 |
"epoch": 1.85,
|
132 |
-
"eval_f1": 0.
|
133 |
-
"eval_loss": 1.
|
134 |
-
"eval_runtime": 3.
|
135 |
-
"eval_samples_per_second":
|
136 |
-
"eval_steps_per_second":
|
137 |
"step": 1300
|
138 |
},
|
139 |
{
|
140 |
"epoch": 1.99,
|
141 |
-
"eval_f1": 0.
|
142 |
-
"eval_loss": 1.
|
143 |
-
"eval_runtime": 3.
|
144 |
-
"eval_samples_per_second": 29.
|
145 |
-
"eval_steps_per_second": 29.
|
146 |
"step": 1400
|
147 |
},
|
148 |
{
|
149 |
"epoch": 2.14,
|
150 |
"learning_rate": 1.7179487179487178e-05,
|
151 |
-
"loss": 0.
|
152 |
"step": 1500
|
153 |
},
|
154 |
{
|
155 |
"epoch": 2.14,
|
156 |
-
"eval_f1": 0.
|
157 |
-
"eval_loss": 1.
|
158 |
-
"eval_runtime": 3.
|
159 |
-
"eval_samples_per_second": 29.
|
160 |
-
"eval_steps_per_second": 29.
|
161 |
"step": 1500
|
162 |
},
|
163 |
{
|
164 |
"epoch": 2.28,
|
165 |
-
"eval_f1": 0.
|
166 |
-
"eval_loss": 1.
|
167 |
-
"eval_runtime": 3.
|
168 |
-
"eval_samples_per_second":
|
169 |
-
"eval_steps_per_second":
|
170 |
"step": 1600
|
171 |
},
|
172 |
{
|
173 |
"epoch": 2.42,
|
174 |
-
"eval_f1": 0.
|
175 |
-
"eval_loss": 1.
|
176 |
-
"eval_runtime": 3.
|
177 |
-
"eval_samples_per_second":
|
178 |
-
"eval_steps_per_second":
|
179 |
"step": 1700
|
180 |
},
|
181 |
{
|
182 |
"epoch": 2.56,
|
183 |
-
"eval_f1": 0.
|
184 |
-
"eval_loss": 1.
|
185 |
-
"eval_runtime": 3.
|
186 |
-
"eval_samples_per_second":
|
187 |
-
"eval_steps_per_second":
|
188 |
"step": 1800
|
189 |
},
|
190 |
{
|
191 |
"epoch": 2.71,
|
192 |
-
"eval_f1": 0.
|
193 |
-
"eval_loss": 1.
|
194 |
-
"eval_runtime": 3.
|
195 |
-
"eval_samples_per_second": 29.
|
196 |
-
"eval_steps_per_second": 29.
|
197 |
"step": 1900
|
198 |
},
|
199 |
{
|
200 |
"epoch": 2.85,
|
201 |
"learning_rate": 1.2905982905982905e-05,
|
202 |
-
"loss": 0.
|
203 |
"step": 2000
|
204 |
},
|
205 |
{
|
206 |
"epoch": 2.85,
|
207 |
-
"eval_f1": 0.
|
208 |
-
"eval_loss": 1.
|
209 |
-
"eval_runtime": 3.
|
210 |
-
"eval_samples_per_second": 29.
|
211 |
-
"eval_steps_per_second": 29.
|
212 |
"step": 2000
|
213 |
},
|
214 |
{
|
215 |
"epoch": 2.99,
|
216 |
-
"eval_f1": 0.
|
217 |
-
"eval_loss": 1.
|
218 |
-
"eval_runtime": 3.
|
219 |
-
"eval_samples_per_second": 29.
|
220 |
-
"eval_steps_per_second": 29.
|
221 |
"step": 2100
|
222 |
},
|
223 |
{
|
224 |
"epoch": 3.13,
|
225 |
-
"eval_f1": 0.
|
226 |
-
"eval_loss": 1.
|
227 |
-
"eval_runtime": 3.
|
228 |
-
"eval_samples_per_second": 29.
|
229 |
-
"eval_steps_per_second": 29.
|
230 |
"step": 2200
|
231 |
},
|
232 |
{
|
233 |
"epoch": 3.28,
|
234 |
-
"eval_f1": 0.
|
235 |
-
"eval_loss": 1.
|
236 |
-
"eval_runtime": 3.
|
237 |
-
"eval_samples_per_second": 29.
|
238 |
-
"eval_steps_per_second": 29.
|
239 |
"step": 2300
|
240 |
},
|
241 |
{
|
242 |
"epoch": 3.42,
|
243 |
-
"eval_f1": 0.
|
244 |
-
"eval_loss": 1.
|
245 |
-
"eval_runtime": 3.
|
246 |
-
"eval_samples_per_second": 29.
|
247 |
-
"eval_steps_per_second": 29.
|
248 |
"step": 2400
|
249 |
},
|
250 |
{
|
251 |
"epoch": 3.56,
|
252 |
"learning_rate": 8.632478632478633e-06,
|
253 |
-
"loss": 0.
|
254 |
"step": 2500
|
255 |
},
|
256 |
{
|
257 |
"epoch": 3.56,
|
258 |
-
"eval_f1": 0.
|
259 |
-
"eval_loss": 1.
|
260 |
-
"eval_runtime": 3.
|
261 |
-
"eval_samples_per_second": 29.
|
262 |
-
"eval_steps_per_second": 29.
|
263 |
"step": 2500
|
264 |
},
|
265 |
{
|
266 |
"epoch": 3.7,
|
267 |
-
"eval_f1": 0.
|
268 |
-
"eval_loss": 1.
|
269 |
-
"eval_runtime": 3.
|
270 |
-
"eval_samples_per_second": 29.
|
271 |
-
"eval_steps_per_second": 29.
|
272 |
"step": 2600
|
273 |
},
|
274 |
{
|
275 |
"epoch": 3.85,
|
276 |
-
"eval_f1": 0.
|
277 |
-
"eval_loss": 1.
|
278 |
-
"eval_runtime": 3.
|
279 |
-
"eval_samples_per_second": 29.
|
280 |
-
"eval_steps_per_second": 29.
|
281 |
"step": 2700
|
282 |
},
|
283 |
{
|
284 |
"epoch": 3.99,
|
285 |
-
"eval_f1": 0.
|
286 |
-
"eval_loss": 1.
|
287 |
-
"eval_runtime": 3.
|
288 |
-
"eval_samples_per_second": 29.
|
289 |
-
"eval_steps_per_second": 29.
|
290 |
"step": 2800
|
291 |
},
|
292 |
{
|
293 |
"epoch": 4.13,
|
294 |
-
"eval_f1": 0.
|
295 |
-
"eval_loss": 1.
|
296 |
-
"eval_runtime": 3.
|
297 |
-
"eval_samples_per_second": 29.
|
298 |
-
"eval_steps_per_second": 29.
|
299 |
"step": 2900
|
300 |
},
|
301 |
{
|
302 |
"epoch": 4.27,
|
303 |
"learning_rate": 4.358974358974359e-06,
|
304 |
-
"loss": 0.
|
305 |
"step": 3000
|
306 |
},
|
307 |
{
|
308 |
"epoch": 4.27,
|
309 |
-
"eval_f1": 0.
|
310 |
-
"eval_loss": 1.
|
311 |
-
"eval_runtime": 3.
|
312 |
-
"eval_samples_per_second": 29.
|
313 |
-
"eval_steps_per_second": 29.
|
314 |
"step": 3000
|
315 |
},
|
316 |
{
|
317 |
"epoch": 4.42,
|
318 |
-
"eval_f1": 0.
|
319 |
-
"eval_loss": 1.
|
320 |
-
"eval_runtime": 3.
|
321 |
-
"eval_samples_per_second": 29.
|
322 |
-
"eval_steps_per_second": 29.
|
323 |
"step": 3100
|
324 |
},
|
325 |
{
|
326 |
"epoch": 4.56,
|
327 |
-
"eval_f1": 0.
|
328 |
-
"eval_loss": 1.
|
329 |
-
"eval_runtime": 3.
|
330 |
-
"eval_samples_per_second":
|
331 |
-
"eval_steps_per_second":
|
332 |
"step": 3200
|
333 |
},
|
334 |
{
|
335 |
"epoch": 4.7,
|
336 |
-
"eval_f1": 0.
|
337 |
-
"eval_loss": 1.
|
338 |
-
"eval_runtime": 3.
|
339 |
-
"eval_samples_per_second": 29.
|
340 |
-
"eval_steps_per_second": 29.
|
341 |
"step": 3300
|
342 |
},
|
343 |
{
|
344 |
"epoch": 4.84,
|
345 |
-
"eval_f1": 0.
|
346 |
-
"eval_loss": 1.
|
347 |
-
"eval_runtime": 3.
|
348 |
-
"eval_samples_per_second": 29.
|
349 |
-
"eval_steps_per_second": 29.
|
350 |
"step": 3400
|
351 |
},
|
352 |
{
|
353 |
"epoch": 4.99,
|
354 |
"learning_rate": 8.547008547008547e-08,
|
355 |
-
"loss": 0.
|
356 |
"step": 3500
|
357 |
},
|
358 |
{
|
359 |
"epoch": 4.99,
|
360 |
-
"eval_f1": 0.
|
361 |
-
"eval_loss": 1.
|
362 |
-
"eval_runtime": 3.
|
363 |
-
"eval_samples_per_second": 29.
|
364 |
-
"eval_steps_per_second": 29.
|
365 |
"step": 3500
|
366 |
},
|
367 |
{
|
368 |
"epoch": 5.0,
|
369 |
"step": 3510,
|
370 |
"total_flos": 2890172619430200.0,
|
371 |
-
"train_loss": 0.
|
372 |
-
"train_runtime":
|
373 |
-
"train_samples_per_second": 4.
|
374 |
-
"train_steps_per_second": 4.
|
375 |
}
|
376 |
],
|
377 |
"max_steps": 3510,
|
|
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 0.14,
|
12 |
+
"eval_f1": 0.37622621795062866,
|
13 |
+
"eval_loss": 1.36316978931427,
|
14 |
+
"eval_runtime": 3.2856,
|
15 |
+
"eval_samples_per_second": 29.827,
|
16 |
+
"eval_steps_per_second": 29.827,
|
17 |
"step": 100
|
18 |
},
|
19 |
{
|
20 |
"epoch": 0.28,
|
21 |
+
"eval_f1": 0.41617398891170265,
|
22 |
+
"eval_loss": 1.227824330329895,
|
23 |
+
"eval_runtime": 3.2714,
|
24 |
+
"eval_samples_per_second": 29.957,
|
25 |
+
"eval_steps_per_second": 29.957,
|
26 |
"step": 200
|
27 |
},
|
28 |
{
|
29 |
"epoch": 0.43,
|
30 |
+
"eval_f1": 0.41594055599199414,
|
31 |
+
"eval_loss": 1.1802130937576294,
|
32 |
+
"eval_runtime": 3.2851,
|
33 |
+
"eval_samples_per_second": 29.832,
|
34 |
+
"eval_steps_per_second": 29.832,
|
35 |
"step": 300
|
36 |
},
|
37 |
{
|
38 |
"epoch": 0.57,
|
39 |
+
"eval_f1": 0.4879226887989845,
|
40 |
+
"eval_loss": 1.3237018585205078,
|
41 |
+
"eval_runtime": 3.2714,
|
42 |
+
"eval_samples_per_second": 29.957,
|
43 |
+
"eval_steps_per_second": 29.957,
|
44 |
"step": 400
|
45 |
},
|
46 |
{
|
47 |
"epoch": 0.71,
|
48 |
"learning_rate": 2.572649572649573e-05,
|
49 |
+
"loss": 1.2,
|
50 |
"step": 500
|
51 |
},
|
52 |
{
|
53 |
"epoch": 0.71,
|
54 |
+
"eval_f1": 0.46450380175056494,
|
55 |
+
"eval_loss": 1.2971174716949463,
|
56 |
+
"eval_runtime": 3.2843,
|
57 |
+
"eval_samples_per_second": 29.839,
|
58 |
+
"eval_steps_per_second": 29.839,
|
59 |
"step": 500
|
60 |
},
|
61 |
{
|
62 |
"epoch": 0.85,
|
63 |
+
"eval_f1": 0.5019868520647613,
|
64 |
+
"eval_loss": 1.2549620866775513,
|
65 |
+
"eval_runtime": 3.267,
|
66 |
+
"eval_samples_per_second": 29.997,
|
67 |
+
"eval_steps_per_second": 29.997,
|
68 |
"step": 600
|
69 |
},
|
70 |
{
|
71 |
"epoch": 1.0,
|
72 |
+
"eval_f1": 0.48057967334012397,
|
73 |
+
"eval_loss": 1.1853649616241455,
|
74 |
+
"eval_runtime": 3.277,
|
75 |
+
"eval_samples_per_second": 29.905,
|
76 |
+
"eval_steps_per_second": 29.905,
|
77 |
"step": 700
|
78 |
},
|
79 |
{
|
80 |
"epoch": 1.14,
|
81 |
+
"eval_f1": 0.5011814210846155,
|
82 |
+
"eval_loss": 1.1788480281829834,
|
83 |
+
"eval_runtime": 3.2639,
|
84 |
+
"eval_samples_per_second": 30.025,
|
85 |
+
"eval_steps_per_second": 30.025,
|
86 |
"step": 800
|
87 |
},
|
88 |
{
|
89 |
"epoch": 1.28,
|
90 |
+
"eval_f1": 0.4964300899620197,
|
91 |
+
"eval_loss": 1.093542218208313,
|
92 |
+
"eval_runtime": 3.2724,
|
93 |
+
"eval_samples_per_second": 29.947,
|
94 |
+
"eval_steps_per_second": 29.947,
|
95 |
"step": 900
|
96 |
},
|
97 |
{
|
98 |
"epoch": 1.42,
|
99 |
"learning_rate": 2.1452991452991456e-05,
|
100 |
+
"loss": 0.9189,
|
101 |
"step": 1000
|
102 |
},
|
103 |
{
|
104 |
"epoch": 1.42,
|
105 |
+
"eval_f1": 0.4986272191320895,
|
106 |
+
"eval_loss": 1.2862237691879272,
|
107 |
+
"eval_runtime": 3.302,
|
108 |
+
"eval_samples_per_second": 29.679,
|
109 |
+
"eval_steps_per_second": 29.679,
|
110 |
"step": 1000
|
111 |
},
|
112 |
{
|
113 |
"epoch": 1.57,
|
114 |
+
"eval_f1": 0.49297809308258944,
|
115 |
+
"eval_loss": 1.2222929000854492,
|
116 |
+
"eval_runtime": 3.3171,
|
117 |
+
"eval_samples_per_second": 29.544,
|
118 |
+
"eval_steps_per_second": 29.544,
|
119 |
"step": 1100
|
120 |
},
|
121 |
{
|
122 |
"epoch": 1.71,
|
123 |
+
"eval_f1": 0.4953797333525823,
|
124 |
+
"eval_loss": 1.1196690797805786,
|
125 |
+
"eval_runtime": 3.2943,
|
126 |
+
"eval_samples_per_second": 29.749,
|
127 |
+
"eval_steps_per_second": 29.749,
|
128 |
"step": 1200
|
129 |
},
|
130 |
{
|
131 |
"epoch": 1.85,
|
132 |
+
"eval_f1": 0.5153008157478032,
|
133 |
+
"eval_loss": 1.1256704330444336,
|
134 |
+
"eval_runtime": 3.2631,
|
135 |
+
"eval_samples_per_second": 30.033,
|
136 |
+
"eval_steps_per_second": 30.033,
|
137 |
"step": 1300
|
138 |
},
|
139 |
{
|
140 |
"epoch": 1.99,
|
141 |
+
"eval_f1": 0.5263780363862973,
|
142 |
+
"eval_loss": 1.1729286909103394,
|
143 |
+
"eval_runtime": 3.2904,
|
144 |
+
"eval_samples_per_second": 29.783,
|
145 |
+
"eval_steps_per_second": 29.783,
|
146 |
"step": 1400
|
147 |
},
|
148 |
{
|
149 |
"epoch": 2.14,
|
150 |
"learning_rate": 1.7179487179487178e-05,
|
151 |
+
"loss": 0.8143,
|
152 |
"step": 1500
|
153 |
},
|
154 |
{
|
155 |
"epoch": 2.14,
|
156 |
+
"eval_f1": 0.5165321012151871,
|
157 |
+
"eval_loss": 1.272233486175537,
|
158 |
+
"eval_runtime": 3.3087,
|
159 |
+
"eval_samples_per_second": 29.619,
|
160 |
+
"eval_steps_per_second": 29.619,
|
161 |
"step": 1500
|
162 |
},
|
163 |
{
|
164 |
"epoch": 2.28,
|
165 |
+
"eval_f1": 0.539472065505205,
|
166 |
+
"eval_loss": 1.3217926025390625,
|
167 |
+
"eval_runtime": 3.2634,
|
168 |
+
"eval_samples_per_second": 30.03,
|
169 |
+
"eval_steps_per_second": 30.03,
|
170 |
"step": 1600
|
171 |
},
|
172 |
{
|
173 |
"epoch": 2.42,
|
174 |
+
"eval_f1": 0.5170136038987323,
|
175 |
+
"eval_loss": 1.338261604309082,
|
176 |
+
"eval_runtime": 3.2635,
|
177 |
+
"eval_samples_per_second": 30.029,
|
178 |
+
"eval_steps_per_second": 30.029,
|
179 |
"step": 1700
|
180 |
},
|
181 |
{
|
182 |
"epoch": 2.56,
|
183 |
+
"eval_f1": 0.5138801729725696,
|
184 |
+
"eval_loss": 1.250339388847351,
|
185 |
+
"eval_runtime": 3.2656,
|
186 |
+
"eval_samples_per_second": 30.009,
|
187 |
+
"eval_steps_per_second": 30.009,
|
188 |
"step": 1800
|
189 |
},
|
190 |
{
|
191 |
"epoch": 2.71,
|
192 |
+
"eval_f1": 0.523963853035474,
|
193 |
+
"eval_loss": 1.362999439239502,
|
194 |
+
"eval_runtime": 3.3211,
|
195 |
+
"eval_samples_per_second": 29.508,
|
196 |
+
"eval_steps_per_second": 29.508,
|
197 |
"step": 1900
|
198 |
},
|
199 |
{
|
200 |
"epoch": 2.85,
|
201 |
"learning_rate": 1.2905982905982905e-05,
|
202 |
+
"loss": 0.6175,
|
203 |
"step": 2000
|
204 |
},
|
205 |
{
|
206 |
"epoch": 2.85,
|
207 |
+
"eval_f1": 0.5305458058252502,
|
208 |
+
"eval_loss": 1.402750015258789,
|
209 |
+
"eval_runtime": 3.2768,
|
210 |
+
"eval_samples_per_second": 29.907,
|
211 |
+
"eval_steps_per_second": 29.907,
|
212 |
"step": 2000
|
213 |
},
|
214 |
{
|
215 |
"epoch": 2.99,
|
216 |
+
"eval_f1": 0.5408209021870833,
|
217 |
+
"eval_loss": 1.4016790390014648,
|
218 |
+
"eval_runtime": 3.3122,
|
219 |
+
"eval_samples_per_second": 29.588,
|
220 |
+
"eval_steps_per_second": 29.588,
|
221 |
"step": 2100
|
222 |
},
|
223 |
{
|
224 |
"epoch": 3.13,
|
225 |
+
"eval_f1": 0.541281162975512,
|
226 |
+
"eval_loss": 1.5929616689682007,
|
227 |
+
"eval_runtime": 3.294,
|
228 |
+
"eval_samples_per_second": 29.751,
|
229 |
+
"eval_steps_per_second": 29.751,
|
230 |
"step": 2200
|
231 |
},
|
232 |
{
|
233 |
"epoch": 3.28,
|
234 |
+
"eval_f1": 0.5564758214624422,
|
235 |
+
"eval_loss": 1.5372625589370728,
|
236 |
+
"eval_runtime": 3.2882,
|
237 |
+
"eval_samples_per_second": 29.803,
|
238 |
+
"eval_steps_per_second": 29.803,
|
239 |
"step": 2300
|
240 |
},
|
241 |
{
|
242 |
"epoch": 3.42,
|
243 |
+
"eval_f1": 0.5722151004353093,
|
244 |
+
"eval_loss": 1.5012538433074951,
|
245 |
+
"eval_runtime": 3.3067,
|
246 |
+
"eval_samples_per_second": 29.637,
|
247 |
+
"eval_steps_per_second": 29.637,
|
248 |
"step": 2400
|
249 |
},
|
250 |
{
|
251 |
"epoch": 3.56,
|
252 |
"learning_rate": 8.632478632478633e-06,
|
253 |
+
"loss": 0.4726,
|
254 |
"step": 2500
|
255 |
},
|
256 |
{
|
257 |
"epoch": 3.56,
|
258 |
+
"eval_f1": 0.5226487560978434,
|
259 |
+
"eval_loss": 1.570418119430542,
|
260 |
+
"eval_runtime": 3.3114,
|
261 |
+
"eval_samples_per_second": 29.595,
|
262 |
+
"eval_steps_per_second": 29.595,
|
263 |
"step": 2500
|
264 |
},
|
265 |
{
|
266 |
"epoch": 3.7,
|
267 |
+
"eval_f1": 0.5483719296880323,
|
268 |
+
"eval_loss": 1.5890936851501465,
|
269 |
+
"eval_runtime": 3.2745,
|
270 |
+
"eval_samples_per_second": 29.928,
|
271 |
+
"eval_steps_per_second": 29.928,
|
272 |
"step": 2600
|
273 |
},
|
274 |
{
|
275 |
"epoch": 3.85,
|
276 |
+
"eval_f1": 0.5630120856995185,
|
277 |
+
"eval_loss": 1.5236029624938965,
|
278 |
+
"eval_runtime": 3.2951,
|
279 |
+
"eval_samples_per_second": 29.741,
|
280 |
+
"eval_steps_per_second": 29.741,
|
281 |
"step": 2700
|
282 |
},
|
283 |
{
|
284 |
"epoch": 3.99,
|
285 |
+
"eval_f1": 0.5422100713682105,
|
286 |
+
"eval_loss": 1.52333664894104,
|
287 |
+
"eval_runtime": 3.3261,
|
288 |
+
"eval_samples_per_second": 29.464,
|
289 |
+
"eval_steps_per_second": 29.464,
|
290 |
"step": 2800
|
291 |
},
|
292 |
{
|
293 |
"epoch": 4.13,
|
294 |
+
"eval_f1": 0.5469719933620487,
|
295 |
+
"eval_loss": 1.6104604005813599,
|
296 |
+
"eval_runtime": 3.2888,
|
297 |
+
"eval_samples_per_second": 29.798,
|
298 |
+
"eval_steps_per_second": 29.798,
|
299 |
"step": 2900
|
300 |
},
|
301 |
{
|
302 |
"epoch": 4.27,
|
303 |
"learning_rate": 4.358974358974359e-06,
|
304 |
+
"loss": 0.3745,
|
305 |
"step": 3000
|
306 |
},
|
307 |
{
|
308 |
"epoch": 4.27,
|
309 |
+
"eval_f1": 0.5525357490677262,
|
310 |
+
"eval_loss": 1.7136110067367554,
|
311 |
+
"eval_runtime": 3.3248,
|
312 |
+
"eval_samples_per_second": 29.476,
|
313 |
+
"eval_steps_per_second": 29.476,
|
314 |
"step": 3000
|
315 |
},
|
316 |
{
|
317 |
"epoch": 4.42,
|
318 |
+
"eval_f1": 0.5539436259955471,
|
319 |
+
"eval_loss": 1.6561492681503296,
|
320 |
+
"eval_runtime": 3.2857,
|
321 |
+
"eval_samples_per_second": 29.826,
|
322 |
+
"eval_steps_per_second": 29.826,
|
323 |
"step": 3100
|
324 |
},
|
325 |
{
|
326 |
"epoch": 4.56,
|
327 |
+
"eval_f1": 0.5504413375623162,
|
328 |
+
"eval_loss": 1.7664132118225098,
|
329 |
+
"eval_runtime": 3.2517,
|
330 |
+
"eval_samples_per_second": 30.138,
|
331 |
+
"eval_steps_per_second": 30.138,
|
332 |
"step": 3200
|
333 |
},
|
334 |
{
|
335 |
"epoch": 4.7,
|
336 |
+
"eval_f1": 0.5494419672200014,
|
337 |
+
"eval_loss": 1.750455379486084,
|
338 |
+
"eval_runtime": 3.27,
|
339 |
+
"eval_samples_per_second": 29.969,
|
340 |
+
"eval_steps_per_second": 29.969,
|
341 |
"step": 3300
|
342 |
},
|
343 |
{
|
344 |
"epoch": 4.84,
|
345 |
+
"eval_f1": 0.5516497223039627,
|
346 |
+
"eval_loss": 1.7312653064727783,
|
347 |
+
"eval_runtime": 3.3127,
|
348 |
+
"eval_samples_per_second": 29.583,
|
349 |
+
"eval_steps_per_second": 29.583,
|
350 |
"step": 3400
|
351 |
},
|
352 |
{
|
353 |
"epoch": 4.99,
|
354 |
"learning_rate": 8.547008547008547e-08,
|
355 |
+
"loss": 0.307,
|
356 |
"step": 3500
|
357 |
},
|
358 |
{
|
359 |
"epoch": 4.99,
|
360 |
+
"eval_f1": 0.5515045914952008,
|
361 |
+
"eval_loss": 1.7193822860717773,
|
362 |
+
"eval_runtime": 3.2769,
|
363 |
+
"eval_samples_per_second": 29.907,
|
364 |
+
"eval_steps_per_second": 29.907,
|
365 |
"step": 3500
|
366 |
},
|
367 |
{
|
368 |
"epoch": 5.0,
|
369 |
"step": 3510,
|
370 |
"total_flos": 2890172619430200.0,
|
371 |
+
"train_loss": 0.6706694952103487,
|
372 |
+
"train_runtime": 824.1732,
|
373 |
+
"train_samples_per_second": 4.259,
|
374 |
+
"train_steps_per_second": 4.259
|
375 |
}
|
376 |
],
|
377 |
"max_steps": 3510,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3899
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5347b81bd66fbac180a70e3615c9c445e5992c7677db1c7c6314dc0b49027803
|
3 |
size 3899
|