alex2awesome commited on
Commit
ef4a3ca
·
1 Parent(s): 751f3ef

Training in progress, step 500

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 5.0,
3
  "eval_samples": 98,
4
- "test_f1": 0.5580443282688324,
5
- "test_loss": 1.7251031398773193,
6
- "test_runtime": 3.2842,
7
- "test_samples_per_second": 29.84,
8
- "test_steps_per_second": 29.84,
9
- "train_loss": 0.669303188772283,
10
- "train_runtime": 808.2162,
11
  "train_samples": 702,
12
- "train_samples_per_second": 4.343,
13
- "train_steps_per_second": 4.343
14
  }
 
1
  {
2
  "epoch": 5.0,
3
  "eval_samples": 98,
4
+ "test_f1": 0.5515045914952008,
5
+ "test_loss": 1.7193970680236816,
6
+ "test_runtime": 3.325,
7
+ "test_samples_per_second": 29.474,
8
+ "test_steps_per_second": 29.474,
9
+ "train_loss": 0.6706694952103487,
10
+ "train_runtime": 824.1732,
11
  "train_samples": 702,
12
+ "train_samples_per_second": 4.259,
13
+ "train_steps_per_second": 4.259
14
  }
post-training eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "eval_samples": 98,
3
- "test_f1": 0.5580443282688324,
4
- "test_loss": 1.7251031398773193,
5
- "test_runtime": 3.2842,
6
- "test_samples_per_second": 29.84,
7
- "test_steps_per_second": 29.84
8
  }
 
1
  {
2
  "eval_samples": 98,
3
+ "test_f1": 0.5515045914952008,
4
+ "test_loss": 1.7193970680236816,
5
+ "test_runtime": 3.325,
6
+ "test_samples_per_second": 29.474,
7
+ "test_steps_per_second": 29.474
8
  }
prediction_output.jsonl CHANGED
@@ -1 +1 @@
1
- [{"pred": 10.053011894226074, "label": 0.0}, {"pred": 1.2265956401824951, "label": 5.0}, {"pred": -0.5370747447013855, "label": 2.0}, {"pred": 0.1752518266439438, "label": 4.0}, {"pred": -2.031033754348755, "label": 2.0}, {"pred": -3.3496036529541016, "label": 8.0}, {"pred": -1.9880871772766113, "label": 8.0}, {"pred": -3.086259126663208, "label": 2.0}, {"pred": -1.1470110416412354, "label": 8.0}, {"pred": 1.037685513496399, "label": 2.0}, {"pred": -1.913681149482727, "label": 2.0}, {"pred": 0.26967066526412964, "label": 2.0}, {"pred": 5.334761142730713, "label": 5.0}, {"pred": -1.662973165512085, "label": 5.0}, {"pred": 2.8667726516723633, "label": 5.0}, {"pred": -0.8025294542312622, "label": 5.0}, {"pred": -1.7527247667312622, "label": 5.0}, {"pred": -3.357849359512329, "label": 6.0}, {"pred": 1.181986927986145, "label": 6.0}, {"pred": 1.7410268783569336, "label": 5.0}, {"pred": 7.933586597442627, "label": 8.0}, {"pred": 3.3055169582366943, "label": 8.0}, {"pred": -2.8542656898498535, "label": 2.0}, {"pred": -1.19741690158844, "label": 5.0}, {"pred": -4.84285306930542, "label": 5.0}, {"pred": -2.683229923248291, "label": 5.0}, {"pred": -2.0367188453674316, "label": 2.0}, {"pred": -0.3946422338485718, "label": 2.0}, {"pred": 1.2412188053131104, "label": 2.0}, {"pred": 8.913151741027832, "label": 3.0}, {"pred": 2.1114447116851807, "label": 3.0}, {"pred": -0.3422248363494873, "label": 5.0}, {"pred": -1.5180871486663818, "label": 0.0}, {"pred": -3.858212471008301, "label": 2.0}, {"pred": -3.058220624923706, "label": 3.0}, {"pred": -2.8904712200164795, "label": 3.0}, {"pred": -3.0549261569976807, "label": 3.0}, {"pred": -0.9003552794456482, "label": 7.0}, {"pred": 9.505762100219727, "label": 8.0}, {"pred": -0.5478419065475464, "label": 7.0}, {"pred": -0.37932345271110535, "label": 7.0}, {"pred": -0.5652313828468323, "label": 2.0}, {"pred": -3.1210832595825195, "label": 2.0}, {"pred": -2.2201759815216064, "label": 3.0}, {"pred": -0.22374841570854187, "label": 8.0}, {"pred": -0.4147443473339081, "label": 0.0}, {"pred": -2.522362232208252, "label": 2.0}, {"pred": -0.3626328706741333, "label": 2.0}, {"pred": -0.5300700664520264, "label": 2.0}, {"pred": -0.03919024020433426, "label": 2.0}, {"pred": -0.5216307044029236, "label": 2.0}, {"pred": -2.4257967472076416, "label": 2.0}, {"pred": -0.43984726071357727, "label": 0.0}, {"pred": 7.890976428985596, "label": 7.0}, {"pred": -0.8893976211547852, "label": 7.0}, {"pred": -2.06447434425354, "label": 7.0}, {"pred": -1.5249279737472534, "label": 7.0}, {"pred": -0.5461889505386353, "label": 8.0}, {"pred": -0.3898739814758301, "label": 8.0}, {"pred": -0.2674681544303894, "label": 8.0}, {"pred": -1.5843223333358765, "label": 3.0}, {"pred": -0.9582051038742065, "label": 6.0}, {"pred": 10.02863597869873, "label": 0.0}, {"pred": 2.1390559673309326, "label": 2.0}, {"pred": 0.5683819651603699, "label": 3.0}, {"pred": 7.809494495391846, "label": 7.0}, {"pred": 4.672691345214844, "label": 0.0}, {"pred": -3.1617870330810547, "label": 5.0}, {"pred": -2.968081474304199, "label": 2.0}, {"pred": -4.236698627471924, "label": 2.0}, {"pred": -2.237872362136841, "label": 7.0}, {"pred": -2.4456675052642822, "label": 2.0}, {"pred": 0.0029354728758335114, "label": 2.0}, {"pred": -1.3975402116775513, "label": 2.0}, {"pred": -0.45201340317726135, "label": 8.0}, {"pred": -0.5411576628684998, "label": 8.0}, {"pred": -1.1021785736083984, "label": 3.0}, {"pred": -0.03798329457640648, "label": 0.0}, {"pred": -2.4011411666870117, "label": 0.0}, {"pred": -1.8443877696990967, "label": 2.0}, {"pred": 9.534435272216797, "label": 2.0}, {"pred": -2.261758804321289, "label": 2.0}, {"pred": -0.29918554425239563, "label": 8.0}, {"pred": 9.580008506774902, "label": 6.0}, {"pred": -1.6922998428344727, "label": 2.0}, {"pred": -0.11596175283193588, "label": 2.0}, {"pred": -1.1728023290634155, "label": 8.0}, {"pred": -2.624927282333374, "label": 0.0}, {"pred": -2.2074270248413086, "label": 2.0}, {"pred": 0.1966685950756073, "label": 3.0}, {"pred": -2.8291454315185547, "label": 3.0}, {"pred": -1.2406705617904663, "label": 2.0}, {"pred": 7.857271671295166, "label": 6.0}, {"pred": 3.400002956390381, "label": 0.0}, {"pred": -1.4394145011901855, "label": 0.0}, {"pred": 1.8180431127548218, "label": 5.0}, {"pred": -2.2545340061187744, "label": 5.0}, {"pred": -2.8155367374420166, "label": 5.0}, {"pred": -2.5808486938476562, "label": 5.0}, {"pred": 0.629129946231842, "label": 5.0}, {"pred": -0.1851099729537964, "label": 2.0}, {"pred": 0.6262603998184204, "label": 2.0}, {"pred": 0.04001408815383911, "label": 2.0}, {"pred": -2.094447612762451, "label": 5.0}, {"pred": 1.2242151498794556, "label": 4.0}, {"pred": 6.133328914642334, "label": 8.0}, {"pred": -3.5184497833251953, "label": 4.0}, {"pred": -2.7859067916870117, "label": 4.0}, {"pred": -2.7572460174560547, "label": 4.0}, {"pred": -2.47908616065979, "label": 8.0}, {"pred": -0.5159931182861328, "label": 2.0}, {"pred": 0.07365216314792633, "label": 3.0}, {"pred": 4.490240573883057, "label": 2.0}, {"pred": 6.107156276702881, "label": 5.0}, {"pred": -2.4094760417938232, "label": 5.0}, {"pred": -2.298379898071289, "label": 5.0}, {"pred": -3.0487470626831055, "label": 0.0}, {"pred": -1.3762894868850708, "label": 6.0}, {"pred": -2.8625693321228027, "label": 5.0}, {"pred": -1.4980090856552124, "label": 5.0}, {"pred": -0.9184617400169373, "label": 5.0}, {"pred": -0.7868920564651489, "label": 2.0}, {"pred": 4.807824611663818, "label": 2.0}, {"pred": 4.986772060394287, "label": 2.0}, {"pred": -2.0342037677764893, "label": 2.0}, {"pred": -0.8398276567459106, "label": 2.0}, {"pred": -1.2112452983856201, "label": 2.0}, {"pred": -2.7561631202697754, "label": 5.0}, {"pred": -2.6678245067596436, "label": 5.0}, {"pred": -0.8507716059684753, "label": 6.0}, {"pred": 1.9779833555221558, "label": 6.0}, {"pred": 7.1783857345581055, "label": 0.0}, {"pred": 2.3628337383270264, "label": 2.0}, {"pred": -1.8503243923187256, "label": 2.0}, {"pred": -3.973742961883545, "label": 6.0}, {"pred": -0.961530327796936, "label": 2.0}, {"pred": -2.3103420734405518, "label": 5.0}, {"pred": 0.1533740609884262, "label": 5.0}, {"pred": 0.945532500743866, "label": 6.0}, {"pred": -1.9985706806182861, "label": 2.0}, {"pred": 2.2117724418640137, "label": 5.0}, {"pred": 6.319922924041748, "label": 2.0}, {"pred": -2.8318819999694824, "label": 8.0}, {"pred": -3.428406238555908, "label": 2.0}, {"pred": -2.51615047454834, "label": 2.0}, {"pred": -2.9052319526672363, "label": 5.0}, {"pred": 0.7762099504470825, "label": 8.0}, {"pred": 0.3957656919956207, "label": 5.0}, {"pred": -1.3153777122497559, "label": 5.0}, {"pred": 4.028310298919678, "label": 5.0}, {"pred": 5.811086177825928, "label": 5.0}, {"pred": -2.535435914993286, "label": 5.0}, {"pred": -3.497481107711792, "label": 6.0}, {"pred": 0.04644595459103584, "label": 8.0}, {"pred": -0.3115100562572479, "label": 6.0}, {"pred": 0.016584614291787148, "label": 7.0}, {"pred": -0.7723070979118347, "label": 6.0}, {"pred": -1.5444141626358032, "label": 8.0}, {"pred": -1.141904354095459, "label": 0.0}, {"pred": 8.480439186096191, "label": 2.0}, {"pred": -2.3408584594726562, "label": 6.0}, {"pred": -1.7245216369628906, "label": 5.0}, {"pred": 1.5875722169876099, "label": 5.0}, {"pred": -0.9754578471183777, "label": 2.0}, {"pred": -2.217231035232544, "label": 2.0}, {"pred": -0.6310365796089172, "label": 8.0}, {"pred": -0.9391398429870605, "label": 8.0}, {"pred": -1.012092113494873, "label": 0.0}, {"pred": 8.798238754272461, "label": 2.0}, {"pred": -3.1681442260742188, "label": 0.0}, {"pred": -0.884898841381073, "label": 7.0}, {"pred": 1.2964022159576416, "label": 2.0}, {"pred": -2.4020252227783203, "label": 2.0}, {"pred": -3.692897319793701, "label": 8.0}, {"pred": 0.48183152079582214, "label": 6.0}, {"pred": -0.42652153968811035, "label": 6.0}, {"pred": 1.3932526111602783, "label": 5.0}, {"pred": 7.640524864196777, "label": 5.0}, {"pred": -3.08962082862854, "label": 5.0}, {"pred": -1.635117530822754, "label": 5.0}, {"pred": -0.565514326095581, "label": 5.0}, {"pred": -2.547136068344116, "label": 5.0}, {"pred": -0.6833630204200745, "label": 5.0}, {"pred": -0.7127103209495544, "label": 6.0}, {"pred": -0.14554797112941742, "label": 8.0}, {"pred": -0.12966954708099365, "label": 5.0}, {"pred": -2.2960150241851807, "label": 5.0}, {"pred": -0.6724411249160767, "label": 3.0}, {"pred": 8.128778457641602, "label": 3.0}, {"pred": -1.071781873703003, "label": 6.0}, {"pred": -1.9194085597991943, "label": 2.0}, {"pred": -1.54249906539917, "label": 3.0}, {"pred": -0.6316580176353455, "label": 2.0}, {"pred": -0.36814042925834656, "label": 2.0}, {"pred": -0.3363884389400482, "label": 2.0}, {"pred": -1.6689341068267822, "label": 0.0}, {"pred": -1.132188320159912, "label": 2.0}, {"pred": 10.208016395568848, "label": 5.0}, {"pred": -0.5401054620742798, "label": 5.0}, {"pred": 0.09691311419010162, "label": 5.0}, {"pred": 8.53925609588623, "label": 2.0}, {"pred": 4.391894340515137, "label": 6.0}, {"pred": -3.2388341426849365, "label": 3.0}, {"pred": -2.586022138595581, "label": 3.0}, {"pred": -1.3518240451812744, "label": 6.0}, {"pred": -2.103663444519043, "label": 3.0}, {"pred": -1.933193564414978, "label": 2.0}, {"pred": 1.1616668701171875, "label": 8.0}, {"pred": -1.3652011156082153, "label": 8.0}, {"pred": -4.48310661315918, "label": 0.0}, {"pred": -0.6386572122573853, "label": 1.0}, {"pred": 0.311795175075531, "label": 2.0}, {"pred": 2.243385076522827, "label": 3.0}, {"pred": 7.526029109954834, "label": 6.0}, {"pred": -3.0103445053100586, "label": 8.0}, {"pred": -1.4424251317977905, "label": 7.0}, {"pred": -2.952651262283325, "label": 0.0}, {"pred": -0.9500249028205872, "label": 8.0}, {"pred": 1.3159178495407104, "label": 2.0}, {"pred": 1.183246374130249, "label": 2.0}, {"pred": -2.141357183456421, "label": 5.0}, {"pred": 7.404512405395508, "label": 5.0}, {"pred": 1.0727956295013428, "label": 7.0}, {"pred": -3.514127492904663, "label": 2.0}, {"pred": -3.1212284564971924, "label": 2.0}, {"pred": 0.5527191758155823, "label": 8.0}, {"pred": -0.5971601605415344, "label": 5.0}, {"pred": 5.6161651611328125, "label": 5.0}, {"pred": 4.333760738372803, "label": 2.0}, {"pred": -3.9194581508636475, "label": 6.0}, {"pred": 2.415543556213379, "label": 6.0}, {"pred": -3.0811643600463867, "label": 2.0}, {"pred": -3.876741647720337, "label": 5.0}, {"pred": -1.4490783214569092, "label": 2.0}, {"pred": -2.84633207321167, "label": 5.0}, {"pred": -0.467058002948761, "label": 5.0}, {"pred": 9.251620292663574, "label": 8.0}, {"pred": -1.4969630241394043, "label": 5.0}, {"pred": -0.1453137993812561, "label": 2.0}, {"pred": -0.724045991897583, "label": 6.0}, {"pred": -2.866516351699829, "label": 6.0}, {"pred": -2.0785751342773438, "label": 5.0}, {"pred": 0.3289041221141815, "label": 6.0}, {"pred": -1.6521097421646118, "label": 2.0}, {"pred": -0.6320829391479492, "label": 2.0}, {"pred": 7.514566421508789, "label": 5.0}, {"pred": 4.6928019523620605, "label": 5.0}, {"pred": -3.0128238201141357, "label": 0.0}, {"pred": -1.0718092918395996, "label": 5.0}, {"pred": -3.47845721244812, "label": 2.0}, {"pred": 0.35053759813308716, "label": 2.0}, {"pred": -0.9066526293754578, "label": 2.0}, {"pred": -4.176146984100342, "label": 5.0}, {"pred": -2.598372220993042, "label": 2.0}, {"pred": 7.388974666595459, "label": 6.0}, {"pred": -0.4163927137851715, "label": 6.0}, {"pred": -1.299724817276001, "label": 6.0}, {"pred": 4.256181716918945, "label": 6.0}, {"pred": -0.24266250431537628, "label": 5.0}, {"pred": -2.206273317337036, "label": 5.0}, {"pred": -1.403360366821289, "label": 5.0}, {"pred": -2.5257246494293213, "label": 5.0}, {"pred": -2.7049272060394287, "label": 5.0}, {"pred": 4.336051940917969, "label": 2.0}, {"pred": 6.037196636199951, "label": 5.0}, {"pred": -3.0442888736724854, "label": 8.0}, {"pred": 1.1787348985671997, "label": 8.0}, {"pred": -0.4310672879219055, "label": 0.0}, {"pred": -0.17733784019947052, "label": 3.0}, {"pred": -1.1147769689559937, "label": 2.0}, {"pred": 0.8757383823394775, "label": 2.0}, {"pred": -1.8259029388427734, "label": 2.0}, {"pred": 0.1079450473189354, "label": 8.0}, {"pred": -1.332146406173706, "label": 8.0}, {"pred": -1.5691189765930176, "label": 0.0}, {"pred": 8.07284164428711, "label": 2.0}, {"pred": 1.6381406784057617, "label": 0.0}, {"pred": -2.8433923721313477, "label": 7.0}, {"pred": -3.629115581512451, "label": 0.0}, {"pred": -4.5651140213012695, "label": 0.0}, {"pred": -1.93071448802948, "label": 0.0}, {"pred": 1.3184444904327393, "label": 8.0}, {"pred": 1.7017006874084473, "label": 8.0}, {"pred": -1.1318089962005615, "label": 0.0}, {"pred": 6.206396579742432, "label": 0.0}, {"pred": 2.876361131668091, "label": 2.0}, {"pred": -1.8594180345535278, "label": 2.0}, {"pred": -2.608898878097534, "label": 0.0}]
 
1
+ [{"pred": 10.290809631347656, "label": 0.0}, {"pred": 0.9703102111816406, "label": 5.0}, {"pred": -1.5968129634857178, "label": 2.0}, {"pred": 0.7235668897628784, "label": 4.0}, {"pred": -3.0565438270568848, "label": 2.0}, {"pred": -2.3618180751800537, "label": 8.0}, {"pred": -1.6507198810577393, "label": 8.0}, {"pred": -2.724348783493042, "label": 2.0}, {"pred": -0.7913642525672913, "label": 8.0}, {"pred": -0.1787530481815338, "label": 2.0}, {"pred": -3.9603071212768555, "label": 2.0}, {"pred": 0.03501487895846367, "label": 2.0}, {"pred": 6.6571478843688965, "label": 5.0}, {"pred": -2.7573740482330322, "label": 5.0}, {"pred": 2.7549917697906494, "label": 5.0}, {"pred": -0.9166736602783203, "label": 5.0}, {"pred": 1.1418240070343018, "label": 5.0}, {"pred": -1.7485498189926147, "label": 6.0}, {"pred": 1.0153459310531616, "label": 6.0}, {"pred": 2.065824508666992, "label": 5.0}, {"pred": 7.917908191680908, "label": 8.0}, {"pred": 2.5800442695617676, "label": 8.0}, {"pred": -3.0683484077453613, "label": 2.0}, {"pred": 0.3347143530845642, "label": 5.0}, {"pred": -2.8323209285736084, "label": 5.0}, {"pred": -2.9949421882629395, "label": 5.0}, {"pred": -3.9032864570617676, "label": 2.0}, {"pred": -0.9693252444267273, "label": 2.0}, {"pred": 0.9653197526931763, "label": 2.0}, {"pred": 8.178954124450684, "label": 3.0}, {"pred": 3.3416378498077393, "label": 3.0}, {"pred": -1.3156547546386719, "label": 5.0}, {"pred": -1.044263243675232, "label": 0.0}, {"pred": -3.586060047149658, "label": 2.0}, {"pred": -1.6682804822921753, "label": 3.0}, {"pred": -3.6979148387908936, "label": 3.0}, {"pred": -3.4306604862213135, "label": 3.0}, {"pred": -0.3065505027770996, "label": 7.0}, {"pred": 10.175311088562012, "label": 8.0}, {"pred": -2.8946011066436768, "label": 7.0}, {"pred": -0.09801101684570312, "label": 7.0}, {"pred": 0.9370686411857605, "label": 2.0}, {"pred": -1.5044194459915161, "label": 2.0}, {"pred": -1.840678334236145, "label": 3.0}, {"pred": -0.3210545480251312, "label": 8.0}, {"pred": -1.5668078660964966, "label": 0.0}, {"pred": -1.9491629600524902, "label": 2.0}, {"pred": -0.9471103549003601, "label": 2.0}, {"pred": -1.2616881132125854, "label": 2.0}, {"pred": -1.0521950721740723, "label": 2.0}, {"pred": -0.00420457124710083, "label": 2.0}, {"pred": -1.8757306337356567, "label": 2.0}, {"pred": -1.201255440711975, "label": 0.0}, {"pred": 7.710162162780762, "label": 7.0}, {"pred": -2.4759974479675293, "label": 7.0}, {"pred": -2.5288870334625244, "label": 7.0}, {"pred": -0.9985426664352417, "label": 7.0}, {"pred": -1.529648780822754, "label": 8.0}, {"pred": -1.32073974609375, "label": 8.0}, {"pred": -0.8256033062934875, "label": 8.0}, {"pred": -1.5793683528900146, "label": 3.0}, {"pred": -1.1773693561553955, "label": 6.0}, {"pred": 9.405431747436523, "label": 0.0}, {"pred": 3.3469057083129883, "label": 2.0}, {"pred": 1.315601110458374, "label": 3.0}, {"pred": 6.369998931884766, "label": 7.0}, {"pred": 5.687730312347412, "label": 0.0}, {"pred": -3.7174370288848877, "label": 5.0}, {"pred": -3.179921865463257, "label": 2.0}, {"pred": -3.663841724395752, "label": 2.0}, {"pred": -1.97105872631073, "label": 7.0}, {"pred": -3.0646395683288574, "label": 2.0}, {"pred": -1.2003767490386963, "label": 2.0}, {"pred": -1.9717252254486084, "label": 2.0}, {"pred": -0.3366681635379791, "label": 8.0}, {"pred": -1.3070366382598877, "label": 8.0}, {"pred": -0.8485084176063538, "label": 3.0}, {"pred": -0.20988516509532928, "label": 0.0}, {"pred": -2.292656660079956, "label": 0.0}, {"pred": -2.090421438217163, "label": 2.0}, {"pred": 9.354241371154785, "label": 2.0}, {"pred": -3.073307514190674, "label": 2.0}, {"pred": 0.21364711225032806, "label": 8.0}, {"pred": 9.965922355651855, "label": 6.0}, {"pred": -3.118712902069092, "label": 2.0}, {"pred": -0.03438292443752289, "label": 2.0}, {"pred": 0.2002769261598587, "label": 8.0}, {"pred": -1.063583254814148, "label": 0.0}, {"pred": -1.177560567855835, "label": 2.0}, {"pred": 0.24500791728496552, "label": 3.0}, {"pred": -2.689775228500366, "label": 3.0}, {"pred": -1.0503861904144287, "label": 2.0}, {"pred": 7.594336032867432, "label": 6.0}, {"pred": 2.1890816688537598, "label": 0.0}, {"pred": -1.7860941886901855, "label": 0.0}, {"pred": 5.3757171630859375, "label": 5.0}, {"pred": -1.2799867391586304, "label": 5.0}, {"pred": -2.6802003383636475, "label": 5.0}, {"pred": -3.188868761062622, "label": 5.0}, {"pred": 1.7214399576187134, "label": 5.0}, {"pred": -1.2691649198532104, "label": 2.0}, {"pred": 0.995307207107544, "label": 2.0}, {"pred": 5.972494125366211, "label": 2.0}, {"pred": -3.2425472736358643, "label": 5.0}, {"pred": 0.8252993226051331, "label": 4.0}, {"pred": 0.8438105583190918, "label": 8.0}, {"pred": -1.5899057388305664, "label": 4.0}, {"pred": -3.049546241760254, "label": 4.0}, {"pred": -1.266801357269287, "label": 4.0}, {"pred": -1.5122487545013428, "label": 8.0}, {"pred": -1.030104160308838, "label": 2.0}, {"pred": -0.673987627029419, "label": 3.0}, {"pred": 5.558537483215332, "label": 2.0}, {"pred": 6.735321998596191, "label": 5.0}, {"pred": -0.7411388158798218, "label": 5.0}, {"pred": -1.8602837324142456, "label": 5.0}, {"pred": -2.5063188076019287, "label": 0.0}, {"pred": -0.9122320413589478, "label": 6.0}, {"pred": -2.421311616897583, "label": 5.0}, {"pred": -1.025620937347412, "label": 5.0}, {"pred": -1.5433242321014404, "label": 5.0}, {"pred": -0.10171565413475037, "label": 2.0}, {"pred": 2.286135673522949, "label": 2.0}, {"pred": 7.0758748054504395, "label": 2.0}, {"pred": -2.814152240753174, "label": 2.0}, {"pred": 0.2042846530675888, "label": 2.0}, {"pred": -0.3653508126735687, "label": 2.0}, {"pred": -1.995278000831604, "label": 5.0}, {"pred": -1.831222653388977, "label": 5.0}, {"pred": -1.064313292503357, "label": 6.0}, {"pred": 2.56392765045166, "label": 6.0}, {"pred": 8.376298904418945, "label": 0.0}, {"pred": 1.3188626766204834, "label": 2.0}, {"pred": -2.23551607131958, "label": 2.0}, {"pred": -2.767118215560913, "label": 6.0}, {"pred": -0.0966845378279686, "label": 2.0}, {"pred": -1.313618540763855, "label": 5.0}, {"pred": -0.4058559834957123, "label": 5.0}, {"pred": 1.0895838737487793, "label": 6.0}, {"pred": -2.1371374130249023, "label": 2.0}, {"pred": 1.4900816679000854, "label": 5.0}, {"pred": 6.579888343811035, "label": 2.0}, {"pred": -3.2009799480438232, "label": 8.0}, {"pred": -2.073420524597168, "label": 2.0}, {"pred": -1.242249608039856, "label": 2.0}, {"pred": -1.9888185262680054, "label": 5.0}, {"pred": 0.48061397671699524, "label": 8.0}, {"pred": 0.4645574390888214, "label": 5.0}, {"pred": -1.0836477279663086, "label": 5.0}, {"pred": 2.5422253608703613, "label": 5.0}, {"pred": 6.876040458679199, "label": 5.0}, {"pred": -3.140732765197754, "label": 5.0}, {"pred": -2.5778374671936035, "label": 6.0}, {"pred": 0.025611255317926407, "label": 8.0}, {"pred": 1.1743779182434082, "label": 6.0}, {"pred": 0.28550976514816284, "label": 7.0}, {"pred": -0.5769954323768616, "label": 6.0}, {"pred": -1.9950687885284424, "label": 8.0}, {"pred": -0.8799560070037842, "label": 0.0}, {"pred": 7.703958511352539, "label": 2.0}, {"pred": -1.9189386367797852, "label": 6.0}, {"pred": -3.1383209228515625, "label": 5.0}, {"pred": 2.151003837585449, "label": 5.0}, {"pred": -0.1051066666841507, "label": 2.0}, {"pred": -2.95643949508667, "label": 2.0}, {"pred": -0.4773956835269928, "label": 8.0}, {"pred": -1.499645471572876, "label": 8.0}, {"pred": -0.9763684868812561, "label": 0.0}, {"pred": 8.102483749389648, "label": 2.0}, {"pred": -2.9032540321350098, "label": 0.0}, {"pred": -1.7431988716125488, "label": 7.0}, {"pred": 3.5589823722839355, "label": 2.0}, {"pred": -1.2039445638656616, "label": 2.0}, {"pred": -4.388154029846191, "label": 8.0}, {"pred": 0.5220628380775452, "label": 6.0}, {"pred": -0.9276381134986877, "label": 6.0}, {"pred": 0.7515405416488647, "label": 5.0}, {"pred": 6.313348770141602, "label": 5.0}, {"pred": -3.2387163639068604, "label": 5.0}, {"pred": -1.8151580095291138, "label": 5.0}, {"pred": -1.4219468832015991, "label": 5.0}, {"pred": -1.951231598854065, "label": 5.0}, {"pred": -1.1042416095733643, "label": 5.0}, {"pred": -1.4297571182250977, "label": 6.0}, {"pred": -1.0193731784820557, "label": 8.0}, {"pred": -0.10052667558193207, "label": 5.0}, {"pred": -1.7839338779449463, "label": 5.0}, {"pred": -1.3848637342453003, "label": 3.0}, {"pred": 7.9457926750183105, "label": 3.0}, {"pred": -2.433159828186035, "label": 6.0}, {"pred": -2.399423122406006, "label": 2.0}, {"pred": -0.9965806007385254, "label": 3.0}, {"pred": -1.350462794303894, "label": 2.0}, {"pred": -1.4989750385284424, "label": 2.0}, {"pred": -1.0299936532974243, "label": 2.0}, {"pred": -1.6915379762649536, "label": 0.0}, {"pred": -1.2833725214004517, "label": 2.0}, {"pred": 9.242444038391113, "label": 5.0}, {"pred": -1.860719919204712, "label": 5.0}, {"pred": 0.09586504101753235, "label": 5.0}, {"pred": 8.429362297058105, "label": 2.0}, {"pred": 4.24113655090332, "label": 6.0}, {"pred": -3.6316137313842773, "label": 3.0}, {"pred": -1.099712610244751, "label": 3.0}, {"pred": -0.07706806808710098, "label": 6.0}, {"pred": -2.187944173812866, "label": 3.0}, {"pred": -3.425537586212158, "label": 2.0}, {"pred": 1.153124451637268, "label": 8.0}, {"pred": -1.4549442529678345, "label": 8.0}, {"pred": -2.9547150135040283, "label": 0.0}, {"pred": 0.5087819695472717, "label": 1.0}, {"pred": -0.8468652367591858, "label": 2.0}, {"pred": 4.418088912963867, "label": 3.0}, {"pred": 6.098045349121094, "label": 6.0}, {"pred": -3.040365219116211, "label": 8.0}, {"pred": -2.9563238620758057, "label": 7.0}, {"pred": -1.0640069246292114, "label": 0.0}, {"pred": 0.4458455443382263, "label": 8.0}, {"pred": 2.9810967445373535, "label": 2.0}, {"pred": 1.6232993602752686, "label": 2.0}, {"pred": -2.709993600845337, "label": 5.0}, {"pred": 6.805906772613525, "label": 5.0}, {"pred": 2.1797492504119873, "label": 7.0}, {"pred": -4.349334239959717, "label": 2.0}, {"pred": -4.006707191467285, "label": 2.0}, {"pred": -2.190566301345825, "label": 8.0}, {"pred": -0.9495126008987427, "label": 5.0}, {"pred": 7.999160289764404, "label": 5.0}, {"pred": 3.7388970851898193, "label": 2.0}, {"pred": -2.9454569816589355, "label": 6.0}, {"pred": 2.977922201156616, "label": 6.0}, {"pred": -1.7080835103988647, "label": 2.0}, {"pred": -2.4555013179779053, "label": 5.0}, {"pred": -2.324666738510132, "label": 2.0}, {"pred": -3.444615364074707, "label": 5.0}, {"pred": 0.11816424131393433, "label": 5.0}, {"pred": 9.905989646911621, "label": 8.0}, {"pred": -2.962965488433838, "label": 5.0}, {"pred": 0.030992530286312103, "label": 2.0}, {"pred": 0.42090052366256714, "label": 6.0}, {"pred": -1.4415723085403442, "label": 6.0}, {"pred": -1.0313920974731445, "label": 5.0}, {"pred": 0.4347422420978546, "label": 6.0}, {"pred": -2.240575075149536, "label": 2.0}, {"pred": -0.5948125123977661, "label": 2.0}, {"pred": 6.614602565765381, "label": 5.0}, {"pred": 4.726541042327881, "label": 5.0}, {"pred": -3.396735191345215, "label": 0.0}, {"pred": -0.8261128664016724, "label": 5.0}, {"pred": -2.3094027042388916, "label": 2.0}, {"pred": 0.37960976362228394, "label": 2.0}, {"pred": -3.0307390689849854, "label": 2.0}, {"pred": -3.9579150676727295, "label": 5.0}, {"pred": -2.420196294784546, "label": 2.0}, {"pred": 6.9699907302856445, "label": 6.0}, {"pred": -1.044237494468689, "label": 6.0}, {"pred": -1.1202991008758545, "label": 6.0}, {"pred": 6.221286773681641, "label": 6.0}, {"pred": 1.2837556600570679, "label": 5.0}, {"pred": -1.6445438861846924, "label": 5.0}, {"pred": -2.026249647140503, "label": 5.0}, {"pred": -3.088744640350342, "label": 5.0}, {"pred": -3.18585205078125, "label": 5.0}, {"pred": 3.9137914180755615, "label": 2.0}, {"pred": 5.3143630027771, "label": 5.0}, {"pred": -3.272541046142578, "label": 8.0}, {"pred": 1.741031289100647, "label": 8.0}, {"pred": 1.3089599609375, "label": 0.0}, {"pred": -1.2750519514083862, "label": 3.0}, {"pred": -0.41170594096183777, "label": 2.0}, {"pred": -2.5046517848968506, "label": 2.0}, {"pred": -0.3387974500656128, "label": 2.0}, {"pred": 5.276491641998291, "label": 8.0}, {"pred": -1.4969731569290161, "label": 8.0}, {"pred": -1.0964272022247314, "label": 0.0}, {"pred": 5.757904529571533, "label": 2.0}, {"pred": 0.23799118399620056, "label": 0.0}, {"pred": -3.1069884300231934, "label": 7.0}, {"pred": -1.6246393918991089, "label": 0.0}, {"pred": -3.437112331390381, "label": 0.0}, {"pred": -0.5458642244338989, "label": 0.0}, {"pred": 3.5296177864074707, "label": 8.0}, {"pred": 0.6833158135414124, "label": 8.0}, {"pred": -1.6968615055084229, "label": 0.0}, {"pred": 6.245951175689697, "label": 0.0}, {"pred": 2.876694440841675, "label": 2.0}, {"pred": -3.0809290409088135, "label": 2.0}, {"pred": -3.641925096511841, "label": 0.0}]
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7688e0ffe403ad65a54de1859cab3fd6d5a6c82e3c89f4772c35ed20f3d46e0d
3
  size 714922721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028c21ee2bcbc4765d7f542294c6a65021701767f2244e0a78e657a7356d484a
3
  size 714922721
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.669303188772283,
4
- "train_runtime": 808.2162,
5
  "train_samples": 702,
6
- "train_samples_per_second": 4.343,
7
- "train_steps_per_second": 4.343
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.6706694952103487,
4
+ "train_runtime": 824.1732,
5
  "train_samples": 702,
6
+ "train_samples_per_second": 4.259,
7
+ "train_steps_per_second": 4.259
8
  }
trainer_state.json CHANGED
@@ -9,369 +9,369 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
- "eval_f1": 0.35918314852583866,
13
- "eval_loss": 1.3049309253692627,
14
- "eval_runtime": 3.3245,
15
- "eval_samples_per_second": 29.478,
16
- "eval_steps_per_second": 29.478,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.28,
21
- "eval_f1": 0.40515736823730264,
22
- "eval_loss": 1.2728245258331299,
23
- "eval_runtime": 3.3159,
24
- "eval_samples_per_second": 29.554,
25
- "eval_steps_per_second": 29.554,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 0.43,
30
- "eval_f1": 0.4151566527966167,
31
- "eval_loss": 1.1451201438903809,
32
- "eval_runtime": 3.3072,
33
- "eval_samples_per_second": 29.632,
34
- "eval_steps_per_second": 29.632,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 0.57,
39
- "eval_f1": 0.5018702268874201,
40
- "eval_loss": 1.3512643575668335,
41
- "eval_runtime": 3.3001,
42
- "eval_samples_per_second": 29.696,
43
- "eval_steps_per_second": 29.696,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 0.71,
48
  "learning_rate": 2.572649572649573e-05,
49
- "loss": 1.2057,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 0.71,
54
- "eval_f1": 0.4742112536219468,
55
- "eval_loss": 1.289705514907837,
56
- "eval_runtime": 3.3385,
57
- "eval_samples_per_second": 29.355,
58
- "eval_steps_per_second": 29.355,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 0.85,
63
- "eval_f1": 0.4943761677098492,
64
- "eval_loss": 1.2339965105056763,
65
- "eval_runtime": 3.2935,
66
- "eval_samples_per_second": 29.755,
67
- "eval_steps_per_second": 29.755,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 1.0,
72
- "eval_f1": 0.4782653119856877,
73
- "eval_loss": 1.2076354026794434,
74
- "eval_runtime": 3.3195,
75
- "eval_samples_per_second": 29.523,
76
- "eval_steps_per_second": 29.523,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 1.14,
81
- "eval_f1": 0.495340891238337,
82
- "eval_loss": 1.2073545455932617,
83
- "eval_runtime": 3.3852,
84
- "eval_samples_per_second": 28.949,
85
- "eval_steps_per_second": 28.949,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 1.28,
90
- "eval_f1": 0.4908699435172264,
91
- "eval_loss": 1.1214157342910767,
92
- "eval_runtime": 3.2813,
93
- "eval_samples_per_second": 29.866,
94
- "eval_steps_per_second": 29.866,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 1.42,
99
  "learning_rate": 2.1452991452991456e-05,
100
- "loss": 0.9162,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 1.42,
105
- "eval_f1": 0.5207421095147065,
106
- "eval_loss": 1.2604155540466309,
107
- "eval_runtime": 3.3289,
108
- "eval_samples_per_second": 29.439,
109
- "eval_steps_per_second": 29.439,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 1.57,
114
- "eval_f1": 0.4892920687768911,
115
- "eval_loss": 1.245543122291565,
116
- "eval_runtime": 3.336,
117
- "eval_samples_per_second": 29.377,
118
- "eval_steps_per_second": 29.377,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 1.71,
123
- "eval_f1": 0.49936199134839465,
124
- "eval_loss": 1.0983333587646484,
125
- "eval_runtime": 3.3536,
126
- "eval_samples_per_second": 29.222,
127
- "eval_steps_per_second": 29.222,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 1.85,
132
- "eval_f1": 0.5027275057458506,
133
- "eval_loss": 1.1237385272979736,
134
- "eval_runtime": 3.3034,
135
- "eval_samples_per_second": 29.666,
136
- "eval_steps_per_second": 29.666,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 1.99,
141
- "eval_f1": 0.5252558671348281,
142
- "eval_loss": 1.1780937910079956,
143
- "eval_runtime": 3.3275,
144
- "eval_samples_per_second": 29.452,
145
- "eval_steps_per_second": 29.452,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 2.14,
150
  "learning_rate": 1.7179487179487178e-05,
151
- "loss": 0.8166,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 2.14,
156
- "eval_f1": 0.5182894847113101,
157
- "eval_loss": 1.2812834978103638,
158
- "eval_runtime": 3.3372,
159
- "eval_samples_per_second": 29.366,
160
- "eval_steps_per_second": 29.366,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 2.28,
165
- "eval_f1": 0.5397772175884552,
166
- "eval_loss": 1.3798938989639282,
167
- "eval_runtime": 3.3205,
168
- "eval_samples_per_second": 29.514,
169
- "eval_steps_per_second": 29.514,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 2.42,
174
- "eval_f1": 0.5227683307908794,
175
- "eval_loss": 1.3370585441589355,
176
- "eval_runtime": 3.2816,
177
- "eval_samples_per_second": 29.863,
178
- "eval_steps_per_second": 29.863,
179
  "step": 1700
180
  },
181
  {
182
  "epoch": 2.56,
183
- "eval_f1": 0.522702095595864,
184
- "eval_loss": 1.2437885999679565,
185
- "eval_runtime": 3.3168,
186
- "eval_samples_per_second": 29.547,
187
- "eval_steps_per_second": 29.547,
188
  "step": 1800
189
  },
190
  {
191
  "epoch": 2.71,
192
- "eval_f1": 0.5313986549568716,
193
- "eval_loss": 1.3399726152420044,
194
- "eval_runtime": 3.3047,
195
- "eval_samples_per_second": 29.655,
196
- "eval_steps_per_second": 29.655,
197
  "step": 1900
198
  },
199
  {
200
  "epoch": 2.85,
201
  "learning_rate": 1.2905982905982905e-05,
202
- "loss": 0.6229,
203
  "step": 2000
204
  },
205
  {
206
  "epoch": 2.85,
207
- "eval_f1": 0.5415328573829171,
208
- "eval_loss": 1.3776614665985107,
209
- "eval_runtime": 3.3318,
210
- "eval_samples_per_second": 29.414,
211
- "eval_steps_per_second": 29.414,
212
  "step": 2000
213
  },
214
  {
215
  "epoch": 2.99,
216
- "eval_f1": 0.5525985145128183,
217
- "eval_loss": 1.3482710123062134,
218
- "eval_runtime": 3.2918,
219
- "eval_samples_per_second": 29.771,
220
- "eval_steps_per_second": 29.771,
221
  "step": 2100
222
  },
223
  {
224
  "epoch": 3.13,
225
- "eval_f1": 0.5231664937259481,
226
- "eval_loss": 1.6263172626495361,
227
- "eval_runtime": 3.328,
228
- "eval_samples_per_second": 29.447,
229
- "eval_steps_per_second": 29.447,
230
  "step": 2200
231
  },
232
  {
233
  "epoch": 3.28,
234
- "eval_f1": 0.5557207675216345,
235
- "eval_loss": 1.5367680788040161,
236
- "eval_runtime": 3.3115,
237
- "eval_samples_per_second": 29.593,
238
- "eval_steps_per_second": 29.593,
239
  "step": 2300
240
  },
241
  {
242
  "epoch": 3.42,
243
- "eval_f1": 0.5658303098289025,
244
- "eval_loss": 1.5507080554962158,
245
- "eval_runtime": 3.3402,
246
- "eval_samples_per_second": 29.34,
247
- "eval_steps_per_second": 29.34,
248
  "step": 2400
249
  },
250
  {
251
  "epoch": 3.56,
252
  "learning_rate": 8.632478632478633e-06,
253
- "loss": 0.4661,
254
  "step": 2500
255
  },
256
  {
257
  "epoch": 3.56,
258
- "eval_f1": 0.5247068982894753,
259
- "eval_loss": 1.5510303974151611,
260
- "eval_runtime": 3.3088,
261
- "eval_samples_per_second": 29.618,
262
- "eval_steps_per_second": 29.618,
263
  "step": 2500
264
  },
265
  {
266
  "epoch": 3.7,
267
- "eval_f1": 0.5355312302855276,
268
- "eval_loss": 1.630504846572876,
269
- "eval_runtime": 3.311,
270
- "eval_samples_per_second": 29.598,
271
- "eval_steps_per_second": 29.598,
272
  "step": 2600
273
  },
274
  {
275
  "epoch": 3.85,
276
- "eval_f1": 0.54267936347935,
277
- "eval_loss": 1.5573792457580566,
278
- "eval_runtime": 3.3253,
279
- "eval_samples_per_second": 29.471,
280
- "eval_steps_per_second": 29.471,
281
  "step": 2700
282
  },
283
  {
284
  "epoch": 3.99,
285
- "eval_f1": 0.5414450954861769,
286
- "eval_loss": 1.4870713949203491,
287
- "eval_runtime": 3.3034,
288
- "eval_samples_per_second": 29.667,
289
- "eval_steps_per_second": 29.667,
290
  "step": 2800
291
  },
292
  {
293
  "epoch": 4.13,
294
- "eval_f1": 0.554346591923044,
295
- "eval_loss": 1.6329436302185059,
296
- "eval_runtime": 3.2973,
297
- "eval_samples_per_second": 29.722,
298
- "eval_steps_per_second": 29.722,
299
  "step": 2900
300
  },
301
  {
302
  "epoch": 4.27,
303
  "learning_rate": 4.358974358974359e-06,
304
- "loss": 0.3667,
305
  "step": 3000
306
  },
307
  {
308
  "epoch": 4.27,
309
- "eval_f1": 0.5501771511544494,
310
- "eval_loss": 1.6794120073318481,
311
- "eval_runtime": 3.3148,
312
- "eval_samples_per_second": 29.565,
313
- "eval_steps_per_second": 29.565,
314
  "step": 3000
315
  },
316
  {
317
  "epoch": 4.42,
318
- "eval_f1": 0.5417922487645527,
319
- "eval_loss": 1.6819510459899902,
320
- "eval_runtime": 3.3029,
321
- "eval_samples_per_second": 29.671,
322
- "eval_steps_per_second": 29.671,
323
  "step": 3100
324
  },
325
  {
326
  "epoch": 4.56,
327
- "eval_f1": 0.5528849807780305,
328
- "eval_loss": 1.7637581825256348,
329
- "eval_runtime": 3.3118,
330
- "eval_samples_per_second": 29.591,
331
- "eval_steps_per_second": 29.591,
332
  "step": 3200
333
  },
334
  {
335
  "epoch": 4.7,
336
- "eval_f1": 0.5512848750593062,
337
- "eval_loss": 1.7320890426635742,
338
- "eval_runtime": 3.2827,
339
- "eval_samples_per_second": 29.854,
340
- "eval_steps_per_second": 29.854,
341
  "step": 3300
342
  },
343
  {
344
  "epoch": 4.84,
345
- "eval_f1": 0.5548159887338896,
346
- "eval_loss": 1.74428391456604,
347
- "eval_runtime": 3.3518,
348
- "eval_samples_per_second": 29.238,
349
- "eval_steps_per_second": 29.238,
350
  "step": 3400
351
  },
352
  {
353
  "epoch": 4.99,
354
  "learning_rate": 8.547008547008547e-08,
355
- "loss": 0.2999,
356
  "step": 3500
357
  },
358
  {
359
  "epoch": 4.99,
360
- "eval_f1": 0.5582660030249127,
361
- "eval_loss": 1.7251840829849243,
362
- "eval_runtime": 3.3293,
363
- "eval_samples_per_second": 29.435,
364
- "eval_steps_per_second": 29.435,
365
  "step": 3500
366
  },
367
  {
368
  "epoch": 5.0,
369
  "step": 3510,
370
  "total_flos": 2890172619430200.0,
371
- "train_loss": 0.669303188772283,
372
- "train_runtime": 808.2162,
373
- "train_samples_per_second": 4.343,
374
- "train_steps_per_second": 4.343
375
  }
376
  ],
377
  "max_steps": 3510,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.14,
12
+ "eval_f1": 0.37622621795062866,
13
+ "eval_loss": 1.36316978931427,
14
+ "eval_runtime": 3.2856,
15
+ "eval_samples_per_second": 29.827,
16
+ "eval_steps_per_second": 29.827,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 0.28,
21
+ "eval_f1": 0.41617398891170265,
22
+ "eval_loss": 1.227824330329895,
23
+ "eval_runtime": 3.2714,
24
+ "eval_samples_per_second": 29.957,
25
+ "eval_steps_per_second": 29.957,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 0.43,
30
+ "eval_f1": 0.41594055599199414,
31
+ "eval_loss": 1.1802130937576294,
32
+ "eval_runtime": 3.2851,
33
+ "eval_samples_per_second": 29.832,
34
+ "eval_steps_per_second": 29.832,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 0.57,
39
+ "eval_f1": 0.4879226887989845,
40
+ "eval_loss": 1.3237018585205078,
41
+ "eval_runtime": 3.2714,
42
+ "eval_samples_per_second": 29.957,
43
+ "eval_steps_per_second": 29.957,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 0.71,
48
  "learning_rate": 2.572649572649573e-05,
49
+ "loss": 1.2,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 0.71,
54
+ "eval_f1": 0.46450380175056494,
55
+ "eval_loss": 1.2971174716949463,
56
+ "eval_runtime": 3.2843,
57
+ "eval_samples_per_second": 29.839,
58
+ "eval_steps_per_second": 29.839,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 0.85,
63
+ "eval_f1": 0.5019868520647613,
64
+ "eval_loss": 1.2549620866775513,
65
+ "eval_runtime": 3.267,
66
+ "eval_samples_per_second": 29.997,
67
+ "eval_steps_per_second": 29.997,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 1.0,
72
+ "eval_f1": 0.48057967334012397,
73
+ "eval_loss": 1.1853649616241455,
74
+ "eval_runtime": 3.277,
75
+ "eval_samples_per_second": 29.905,
76
+ "eval_steps_per_second": 29.905,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 1.14,
81
+ "eval_f1": 0.5011814210846155,
82
+ "eval_loss": 1.1788480281829834,
83
+ "eval_runtime": 3.2639,
84
+ "eval_samples_per_second": 30.025,
85
+ "eval_steps_per_second": 30.025,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 1.28,
90
+ "eval_f1": 0.4964300899620197,
91
+ "eval_loss": 1.093542218208313,
92
+ "eval_runtime": 3.2724,
93
+ "eval_samples_per_second": 29.947,
94
+ "eval_steps_per_second": 29.947,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 1.42,
99
  "learning_rate": 2.1452991452991456e-05,
100
+ "loss": 0.9189,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 1.42,
105
+ "eval_f1": 0.4986272191320895,
106
+ "eval_loss": 1.2862237691879272,
107
+ "eval_runtime": 3.302,
108
+ "eval_samples_per_second": 29.679,
109
+ "eval_steps_per_second": 29.679,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 1.57,
114
+ "eval_f1": 0.49297809308258944,
115
+ "eval_loss": 1.2222929000854492,
116
+ "eval_runtime": 3.3171,
117
+ "eval_samples_per_second": 29.544,
118
+ "eval_steps_per_second": 29.544,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 1.71,
123
+ "eval_f1": 0.4953797333525823,
124
+ "eval_loss": 1.1196690797805786,
125
+ "eval_runtime": 3.2943,
126
+ "eval_samples_per_second": 29.749,
127
+ "eval_steps_per_second": 29.749,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 1.85,
132
+ "eval_f1": 0.5153008157478032,
133
+ "eval_loss": 1.1256704330444336,
134
+ "eval_runtime": 3.2631,
135
+ "eval_samples_per_second": 30.033,
136
+ "eval_steps_per_second": 30.033,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 1.99,
141
+ "eval_f1": 0.5263780363862973,
142
+ "eval_loss": 1.1729286909103394,
143
+ "eval_runtime": 3.2904,
144
+ "eval_samples_per_second": 29.783,
145
+ "eval_steps_per_second": 29.783,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 2.14,
150
  "learning_rate": 1.7179487179487178e-05,
151
+ "loss": 0.8143,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 2.14,
156
+ "eval_f1": 0.5165321012151871,
157
+ "eval_loss": 1.272233486175537,
158
+ "eval_runtime": 3.3087,
159
+ "eval_samples_per_second": 29.619,
160
+ "eval_steps_per_second": 29.619,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 2.28,
165
+ "eval_f1": 0.539472065505205,
166
+ "eval_loss": 1.3217926025390625,
167
+ "eval_runtime": 3.2634,
168
+ "eval_samples_per_second": 30.03,
169
+ "eval_steps_per_second": 30.03,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 2.42,
174
+ "eval_f1": 0.5170136038987323,
175
+ "eval_loss": 1.338261604309082,
176
+ "eval_runtime": 3.2635,
177
+ "eval_samples_per_second": 30.029,
178
+ "eval_steps_per_second": 30.029,
179
  "step": 1700
180
  },
181
  {
182
  "epoch": 2.56,
183
+ "eval_f1": 0.5138801729725696,
184
+ "eval_loss": 1.250339388847351,
185
+ "eval_runtime": 3.2656,
186
+ "eval_samples_per_second": 30.009,
187
+ "eval_steps_per_second": 30.009,
188
  "step": 1800
189
  },
190
  {
191
  "epoch": 2.71,
192
+ "eval_f1": 0.523963853035474,
193
+ "eval_loss": 1.362999439239502,
194
+ "eval_runtime": 3.3211,
195
+ "eval_samples_per_second": 29.508,
196
+ "eval_steps_per_second": 29.508,
197
  "step": 1900
198
  },
199
  {
200
  "epoch": 2.85,
201
  "learning_rate": 1.2905982905982905e-05,
202
+ "loss": 0.6175,
203
  "step": 2000
204
  },
205
  {
206
  "epoch": 2.85,
207
+ "eval_f1": 0.5305458058252502,
208
+ "eval_loss": 1.402750015258789,
209
+ "eval_runtime": 3.2768,
210
+ "eval_samples_per_second": 29.907,
211
+ "eval_steps_per_second": 29.907,
212
  "step": 2000
213
  },
214
  {
215
  "epoch": 2.99,
216
+ "eval_f1": 0.5408209021870833,
217
+ "eval_loss": 1.4016790390014648,
218
+ "eval_runtime": 3.3122,
219
+ "eval_samples_per_second": 29.588,
220
+ "eval_steps_per_second": 29.588,
221
  "step": 2100
222
  },
223
  {
224
  "epoch": 3.13,
225
+ "eval_f1": 0.541281162975512,
226
+ "eval_loss": 1.5929616689682007,
227
+ "eval_runtime": 3.294,
228
+ "eval_samples_per_second": 29.751,
229
+ "eval_steps_per_second": 29.751,
230
  "step": 2200
231
  },
232
  {
233
  "epoch": 3.28,
234
+ "eval_f1": 0.5564758214624422,
235
+ "eval_loss": 1.5372625589370728,
236
+ "eval_runtime": 3.2882,
237
+ "eval_samples_per_second": 29.803,
238
+ "eval_steps_per_second": 29.803,
239
  "step": 2300
240
  },
241
  {
242
  "epoch": 3.42,
243
+ "eval_f1": 0.5722151004353093,
244
+ "eval_loss": 1.5012538433074951,
245
+ "eval_runtime": 3.3067,
246
+ "eval_samples_per_second": 29.637,
247
+ "eval_steps_per_second": 29.637,
248
  "step": 2400
249
  },
250
  {
251
  "epoch": 3.56,
252
  "learning_rate": 8.632478632478633e-06,
253
+ "loss": 0.4726,
254
  "step": 2500
255
  },
256
  {
257
  "epoch": 3.56,
258
+ "eval_f1": 0.5226487560978434,
259
+ "eval_loss": 1.570418119430542,
260
+ "eval_runtime": 3.3114,
261
+ "eval_samples_per_second": 29.595,
262
+ "eval_steps_per_second": 29.595,
263
  "step": 2500
264
  },
265
  {
266
  "epoch": 3.7,
267
+ "eval_f1": 0.5483719296880323,
268
+ "eval_loss": 1.5890936851501465,
269
+ "eval_runtime": 3.2745,
270
+ "eval_samples_per_second": 29.928,
271
+ "eval_steps_per_second": 29.928,
272
  "step": 2600
273
  },
274
  {
275
  "epoch": 3.85,
276
+ "eval_f1": 0.5630120856995185,
277
+ "eval_loss": 1.5236029624938965,
278
+ "eval_runtime": 3.2951,
279
+ "eval_samples_per_second": 29.741,
280
+ "eval_steps_per_second": 29.741,
281
  "step": 2700
282
  },
283
  {
284
  "epoch": 3.99,
285
+ "eval_f1": 0.5422100713682105,
286
+ "eval_loss": 1.52333664894104,
287
+ "eval_runtime": 3.3261,
288
+ "eval_samples_per_second": 29.464,
289
+ "eval_steps_per_second": 29.464,
290
  "step": 2800
291
  },
292
  {
293
  "epoch": 4.13,
294
+ "eval_f1": 0.5469719933620487,
295
+ "eval_loss": 1.6104604005813599,
296
+ "eval_runtime": 3.2888,
297
+ "eval_samples_per_second": 29.798,
298
+ "eval_steps_per_second": 29.798,
299
  "step": 2900
300
  },
301
  {
302
  "epoch": 4.27,
303
  "learning_rate": 4.358974358974359e-06,
304
+ "loss": 0.3745,
305
  "step": 3000
306
  },
307
  {
308
  "epoch": 4.27,
309
+ "eval_f1": 0.5525357490677262,
310
+ "eval_loss": 1.7136110067367554,
311
+ "eval_runtime": 3.3248,
312
+ "eval_samples_per_second": 29.476,
313
+ "eval_steps_per_second": 29.476,
314
  "step": 3000
315
  },
316
  {
317
  "epoch": 4.42,
318
+ "eval_f1": 0.5539436259955471,
319
+ "eval_loss": 1.6561492681503296,
320
+ "eval_runtime": 3.2857,
321
+ "eval_samples_per_second": 29.826,
322
+ "eval_steps_per_second": 29.826,
323
  "step": 3100
324
  },
325
  {
326
  "epoch": 4.56,
327
+ "eval_f1": 0.5504413375623162,
328
+ "eval_loss": 1.7664132118225098,
329
+ "eval_runtime": 3.2517,
330
+ "eval_samples_per_second": 30.138,
331
+ "eval_steps_per_second": 30.138,
332
  "step": 3200
333
  },
334
  {
335
  "epoch": 4.7,
336
+ "eval_f1": 0.5494419672200014,
337
+ "eval_loss": 1.750455379486084,
338
+ "eval_runtime": 3.27,
339
+ "eval_samples_per_second": 29.969,
340
+ "eval_steps_per_second": 29.969,
341
  "step": 3300
342
  },
343
  {
344
  "epoch": 4.84,
345
+ "eval_f1": 0.5516497223039627,
346
+ "eval_loss": 1.7312653064727783,
347
+ "eval_runtime": 3.3127,
348
+ "eval_samples_per_second": 29.583,
349
+ "eval_steps_per_second": 29.583,
350
  "step": 3400
351
  },
352
  {
353
  "epoch": 4.99,
354
  "learning_rate": 8.547008547008547e-08,
355
+ "loss": 0.307,
356
  "step": 3500
357
  },
358
  {
359
  "epoch": 4.99,
360
+ "eval_f1": 0.5515045914952008,
361
+ "eval_loss": 1.7193822860717773,
362
+ "eval_runtime": 3.2769,
363
+ "eval_samples_per_second": 29.907,
364
+ "eval_steps_per_second": 29.907,
365
  "step": 3500
366
  },
367
  {
368
  "epoch": 5.0,
369
  "step": 3510,
370
  "total_flos": 2890172619430200.0,
371
+ "train_loss": 0.6706694952103487,
372
+ "train_runtime": 824.1732,
373
+ "train_samples_per_second": 4.259,
374
+ "train_steps_per_second": 4.259
375
  }
376
  ],
377
  "max_steps": 3510,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfda5e98c81808339d7d2f4ebe1a4d8758850fa025f005c7994010b43c85468
3
  size 3899
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5347b81bd66fbac180a70e3615c9c445e5992c7677db1c7c6314dc0b49027803
3
  size 3899