|
[ |
|
{ |
|
"loss": 0.7081, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.02, |
|
"step": 1 |
|
}, |
|
{ |
|
"loss": 0.6929, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.04, |
|
"step": 2 |
|
}, |
|
{ |
|
"loss": 0.6787, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.05, |
|
"step": 3 |
|
}, |
|
{ |
|
"loss": 0.6709, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.07, |
|
"step": 4 |
|
}, |
|
{ |
|
"loss": 0.6813, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.09, |
|
"step": 5 |
|
}, |
|
{ |
|
"loss": 0.6943, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.11, |
|
"step": 6 |
|
}, |
|
{ |
|
"loss": 0.6723, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.12, |
|
"step": 7 |
|
}, |
|
{ |
|
"loss": 0.6957, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.14, |
|
"step": 8 |
|
}, |
|
{ |
|
"loss": 0.6761, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.16, |
|
"step": 9 |
|
}, |
|
{ |
|
"loss": 0.6849, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.18, |
|
"step": 10 |
|
}, |
|
{ |
|
"loss": 0.6845, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.19, |
|
"step": 11 |
|
}, |
|
{ |
|
"loss": 0.6669, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.21, |
|
"step": 12 |
|
}, |
|
{ |
|
"loss": 0.6898, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.23, |
|
"step": 13 |
|
}, |
|
{ |
|
"loss": 0.679, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.25, |
|
"step": 14 |
|
}, |
|
{ |
|
"loss": 0.6709, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.26, |
|
"step": 15 |
|
}, |
|
{ |
|
"loss": 0.6656, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.28, |
|
"step": 16 |
|
}, |
|
{ |
|
"loss": 0.6515, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.3, |
|
"step": 17 |
|
}, |
|
{ |
|
"loss": 0.6924, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.32, |
|
"step": 18 |
|
}, |
|
{ |
|
"loss": 0.6658, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.33, |
|
"step": 19 |
|
}, |
|
{ |
|
"loss": 0.6431, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.35, |
|
"step": 20 |
|
}, |
|
{ |
|
"loss": 0.654, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.37, |
|
"step": 21 |
|
}, |
|
{ |
|
"loss": 0.6413, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.39, |
|
"step": 22 |
|
}, |
|
{ |
|
"loss": 0.6324, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.4, |
|
"step": 23 |
|
}, |
|
{ |
|
"loss": 0.6477, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.42, |
|
"step": 24 |
|
}, |
|
{ |
|
"loss": 0.5986, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"eval_alpaca_easy_loss": 0.6038309931755066, |
|
"eval_alpaca_easy_score": -0.2037501335144043, |
|
"eval_alpaca_easy_brier_score": 0.2037501335144043, |
|
"eval_alpaca_easy_average_probability": 0.5523186326026917, |
|
"eval_alpaca_easy_accuracy": 0.82, |
|
"eval_alpaca_easy_probabilities": [ |
|
0.5295308232307434, |
|
0.5057793259620667, |
|
0.5614108443260193, |
|
0.5546582937240601, |
|
0.5417689681053162, |
|
0.5463963150978088, |
|
0.5312756299972534, |
|
0.5220988392829895, |
|
0.5744419693946838, |
|
0.49535176157951355, |
|
0.5163486003875732, |
|
0.5045437216758728, |
|
0.506483256816864, |
|
0.4947761595249176, |
|
0.5362703800201416, |
|
0.5523822903633118, |
|
0.5626899600028992, |
|
0.5259707570075989, |
|
0.47350722551345825, |
|
0.5299355387687683, |
|
0.529019296169281, |
|
0.4980572760105133, |
|
0.5992984175682068, |
|
0.5658584237098694, |
|
0.5407801866531372, |
|
0.5228542685508728, |
|
0.555320680141449, |
|
0.5154590010643005, |
|
0.5021631121635437, |
|
0.4988820254802704, |
|
0.4870697855949402, |
|
0.4954841434955597, |
|
0.4929210841655731, |
|
0.7058207988739014, |
|
0.6420620083808899, |
|
0.598629355430603, |
|
0.6156444549560547, |
|
0.7285832762718201, |
|
0.6223328709602356, |
|
0.5062462687492371, |
|
0.49911031126976013, |
|
0.5151018500328064, |
|
0.5159682035446167, |
|
0.5467914938926697, |
|
0.543242871761322, |
|
0.5003177523612976, |
|
0.5372806787490845, |
|
0.4985983967781067, |
|
0.5429829955101013, |
|
0.5698220729827881, |
|
0.5233312249183655, |
|
0.5959987044334412, |
|
0.49883463978767395, |
|
0.506894052028656, |
|
0.6351782083511353, |
|
0.5343899726867676, |
|
0.5538343787193298, |
|
0.5531216263771057, |
|
0.5477787256240845, |
|
0.5444912910461426, |
|
0.5307953357696533, |
|
0.4955398440361023, |
|
0.4888904094696045, |
|
0.5758087635040283, |
|
0.5426626205444336, |
|
0.5585545301437378, |
|
0.5415331125259399, |
|
0.5330247282981873, |
|
0.4646024703979492, |
|
0.571418046951294, |
|
0.5183749794960022, |
|
0.56840580701828, |
|
0.5402299165725708, |
|
0.5138457417488098, |
|
0.5349569320678711, |
|
0.4811114966869354, |
|
0.4874401092529297, |
|
0.4939919114112854, |
|
0.7561653256416321, |
|
0.7366093993186951, |
|
0.5557411909103394, |
|
0.6353941559791565, |
|
0.6225665211677551, |
|
0.5292084217071533, |
|
0.6310750246047974, |
|
0.6973796486854553, |
|
0.5622038245201111, |
|
0.4889812171459198, |
|
0.6262393593788147, |
|
0.5937395691871643, |
|
0.5847038626670837, |
|
0.6228916645050049, |
|
0.5857441425323486, |
|
0.5964227318763733, |
|
0.6308814287185669, |
|
0.6183395385742188, |
|
0.5489331483840942, |
|
0.5096936225891113, |
|
0.564061164855957, |
|
0.5425236225128174 |
|
], |
|
"eval_alpaca_easy_runtime": 3.7799, |
|
"eval_alpaca_easy_samples_per_second": 26.455, |
|
"eval_alpaca_easy_steps_per_second": 0.529, |
|
"epoch": 0.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"eval_alpaca_hard_loss": 0.6865063309669495, |
|
"eval_alpaca_hard_score": -0.24245089292526245, |
|
"eval_alpaca_hard_brier_score": 0.24245089292526245, |
|
"eval_alpaca_hard_average_probability": 0.5107858777046204, |
|
"eval_alpaca_hard_accuracy": 0.58, |
|
"eval_alpaca_hard_probabilities": [ |
|
0.5364819169044495, |
|
0.544447660446167, |
|
0.4942440688610077, |
|
0.4309611916542053, |
|
0.4760230779647827, |
|
0.4633902609348297, |
|
0.5857327580451965, |
|
0.47645318508148193, |
|
0.5042262673377991, |
|
0.5458271503448486, |
|
0.502610445022583, |
|
0.46624234318733215, |
|
0.5814525485038757, |
|
0.566200852394104, |
|
0.5369539856910706, |
|
0.5992622971534729, |
|
0.5005086064338684, |
|
0.5276132225990295, |
|
0.6101058125495911, |
|
0.5822996497154236, |
|
0.6960972547531128, |
|
0.469584196805954, |
|
0.4924374222755432, |
|
0.43986785411834717, |
|
0.5514828562736511, |
|
0.5575672388076782, |
|
0.5200520157814026, |
|
0.47670218348503113, |
|
0.4617862105369568, |
|
0.44412386417388916, |
|
0.4381404221057892, |
|
0.43889090418815613, |
|
0.4304584264755249, |
|
0.5349152088165283, |
|
0.5112286806106567, |
|
0.514824628829956, |
|
0.5156821608543396, |
|
0.513275682926178, |
|
0.48444420099258423, |
|
0.4030201733112335, |
|
0.5189833641052246, |
|
0.5497680902481079, |
|
0.413649320602417, |
|
0.4391551613807678, |
|
0.4390448033809662, |
|
0.5646383166313171, |
|
0.5546966195106506, |
|
0.561900794506073, |
|
0.5682961344718933, |
|
0.5135508179664612, |
|
0.5880781412124634, |
|
0.5017771124839783, |
|
0.48068204522132874, |
|
0.48269954323768616, |
|
0.45279571413993835, |
|
0.4608636200428009, |
|
0.4884440302848816, |
|
0.4911855161190033, |
|
0.49670666456222534, |
|
0.5000248551368713, |
|
0.4351290166378021, |
|
0.48081785440444946, |
|
0.46093469858169556, |
|
0.5141025185585022, |
|
0.5184120535850525, |
|
0.5283690094947815, |
|
0.6107799410820007, |
|
0.5938130617141724, |
|
0.5590876936912537, |
|
0.49047666788101196, |
|
0.4948485493659973, |
|
0.5061522722244263, |
|
0.5803380012512207, |
|
0.5489677786827087, |
|
0.5542504787445068, |
|
0.4934714436531067, |
|
0.4892035126686096, |
|
0.4983595311641693, |
|
0.4986925423145294, |
|
0.5141690969467163, |
|
0.5002953410148621, |
|
0.3621358275413513, |
|
0.4719257950782776, |
|
0.3652734160423279, |
|
0.5343693494796753, |
|
0.5327456593513489, |
|
0.543885350227356, |
|
0.5434888601303101, |
|
0.5200163722038269, |
|
0.5112729072570801, |
|
0.5116232633590698, |
|
0.46298518776893616, |
|
0.47948920726776123, |
|
0.5102067589759827, |
|
0.5256311297416687, |
|
0.5086446404457092, |
|
0.6728835701942444, |
|
0.6101486682891846, |
|
0.499792218208313, |
|
0.5488373041152954 |
|
], |
|
"eval_alpaca_hard_runtime": 13.2843, |
|
"eval_alpaca_hard_samples_per_second": 7.528, |
|
"eval_alpaca_hard_steps_per_second": 0.151, |
|
"epoch": 0.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"loss": 0.6053, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.46, |
|
"step": 26 |
|
}, |
|
{ |
|
"loss": 0.6074, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.47, |
|
"step": 27 |
|
}, |
|
{ |
|
"loss": 0.6186, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.49, |
|
"step": 28 |
|
}, |
|
{ |
|
"loss": 0.55, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.51, |
|
"step": 29 |
|
}, |
|
{ |
|
"loss": 0.5719, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.53, |
|
"step": 30 |
|
}, |
|
{ |
|
"loss": 0.592, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.54, |
|
"step": 31 |
|
}, |
|
{ |
|
"loss": 0.5286, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.56, |
|
"step": 32 |
|
}, |
|
{ |
|
"loss": 0.4958, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.58, |
|
"step": 33 |
|
}, |
|
{ |
|
"loss": 0.4903, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.6, |
|
"step": 34 |
|
}, |
|
{ |
|
"loss": 0.5294, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.61, |
|
"step": 35 |
|
}, |
|
{ |
|
"loss": 0.5333, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.63, |
|
"step": 36 |
|
}, |
|
{ |
|
"loss": 0.439, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.65, |
|
"step": 37 |
|
}, |
|
{ |
|
"loss": 0.4078, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.67, |
|
"step": 38 |
|
}, |
|
{ |
|
"loss": 0.3939, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.68, |
|
"step": 39 |
|
}, |
|
{ |
|
"loss": 0.3791, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.7, |
|
"step": 40 |
|
}, |
|
{ |
|
"loss": 0.3351, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.72, |
|
"step": 41 |
|
}, |
|
{ |
|
"loss": 0.3012, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.74, |
|
"step": 42 |
|
}, |
|
{ |
|
"loss": 0.3834, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.75, |
|
"step": 43 |
|
}, |
|
{ |
|
"loss": 0.2498, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.77, |
|
"step": 44 |
|
}, |
|
{ |
|
"loss": 0.3005, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.79, |
|
"step": 45 |
|
}, |
|
{ |
|
"loss": 0.3361, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.81, |
|
"step": 46 |
|
}, |
|
{ |
|
"loss": 0.2982, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.82, |
|
"step": 47 |
|
}, |
|
{ |
|
"loss": 0.2297, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.84, |
|
"step": 48 |
|
}, |
|
{ |
|
"loss": 0.1461, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.86, |
|
"step": 49 |
|
}, |
|
{ |
|
"loss": 0.2514, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_alpaca_easy_loss": 0.21854297816753387, |
|
"eval_alpaca_easy_score": -0.06911445409059525, |
|
"eval_alpaca_easy_brier_score": 0.06911445409059525, |
|
"eval_alpaca_easy_average_probability": 0.8375207781791687, |
|
"eval_alpaca_easy_accuracy": 0.89, |
|
"eval_alpaca_easy_probabilities": [ |
|
0.9335387945175171, |
|
0.7575387358665466, |
|
0.8241543173789978, |
|
0.9561293125152588, |
|
0.8131343722343445, |
|
0.9223845601081848, |
|
0.8792718648910522, |
|
0.7876971960067749, |
|
0.8319079875946045, |
|
0.5416871905326843, |
|
0.46801650524139404, |
|
0.6152990460395813, |
|
0.9775791764259338, |
|
0.8192522525787354, |
|
0.9923909306526184, |
|
0.933570384979248, |
|
0.8240628838539124, |
|
0.9486610293388367, |
|
0.5026794672012329, |
|
0.8657979369163513, |
|
0.9835583567619324, |
|
0.9997386336326599, |
|
0.9999597072601318, |
|
0.999686598777771, |
|
0.8829324841499329, |
|
0.9419685006141663, |
|
0.9505814909934998, |
|
0.8838004469871521, |
|
0.9533843994140625, |
|
0.9830142259597778, |
|
0.41525161266326904, |
|
0.37579113245010376, |
|
0.43717271089553833, |
|
0.9997331500053406, |
|
0.9996693134307861, |
|
0.9993166923522949, |
|
0.999929666519165, |
|
0.9999723434448242, |
|
0.9998679161071777, |
|
0.9194768667221069, |
|
0.9749594926834106, |
|
0.953002393245697, |
|
0.5442489981651306, |
|
0.8037471771240234, |
|
0.9515517354011536, |
|
0.38740476965904236, |
|
0.598035454750061, |
|
0.3506077527999878, |
|
0.8902438879013062, |
|
0.9017742276191711, |
|
0.7621347904205322, |
|
0.9978957176208496, |
|
0.9675268530845642, |
|
0.362326443195343, |
|
0.9315894246101379, |
|
0.9950016140937805, |
|
0.7550820112228394, |
|
0.5803986191749573, |
|
0.6384031176567078, |
|
0.9678927063941956, |
|
0.4579581320285797, |
|
0.5416884422302246, |
|
0.2313394397497177, |
|
0.9961456060409546, |
|
0.9986750483512878, |
|
0.9978812336921692, |
|
0.7217641472816467, |
|
0.918764054775238, |
|
0.5912315249443054, |
|
0.9710973501205444, |
|
0.9987249970436096, |
|
0.9980605244636536, |
|
0.9760357141494751, |
|
0.9090193510055542, |
|
0.7866935133934021, |
|
0.5642421245574951, |
|
0.4463464617729187, |
|
0.5121012330055237, |
|
0.9997532963752747, |
|
0.9999643564224243, |
|
0.997353196144104, |
|
0.9998928308486938, |
|
0.9993001222610474, |
|
0.8527346253395081, |
|
0.9968653321266174, |
|
0.9998250603675842, |
|
0.9157262444496155, |
|
0.9992050528526306, |
|
0.9995445609092712, |
|
0.9996125102043152, |
|
0.9402360320091248, |
|
0.9978588223457336, |
|
0.9311167001724243, |
|
0.9998679161071777, |
|
0.9999362230300903, |
|
0.9991693496704102, |
|
0.9884861707687378, |
|
0.892048180103302, |
|
0.9969700574874878, |
|
0.3954496383666992 |
|
], |
|
"eval_alpaca_easy_runtime": 3.7822, |
|
"eval_alpaca_easy_samples_per_second": 26.439, |
|
"eval_alpaca_easy_steps_per_second": 0.529, |
|
"epoch": 0.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_alpaca_hard_loss": 0.7232790589332581, |
|
"eval_alpaca_hard_score": -0.24684154987335205, |
|
"eval_alpaca_hard_brier_score": 0.24684154987335205, |
|
"eval_alpaca_hard_average_probability": 0.5583958029747009, |
|
"eval_alpaca_hard_accuracy": 0.56, |
|
"eval_alpaca_hard_probabilities": [ |
|
0.5091797709465027, |
|
0.45687368512153625, |
|
0.2296309471130371, |
|
0.09132048487663269, |
|
0.1085263267159462, |
|
0.06897741556167603, |
|
0.6520460844039917, |
|
0.26198920607566833, |
|
0.38983526825904846, |
|
0.672316312789917, |
|
0.5977341532707214, |
|
0.5184259414672852, |
|
0.917797327041626, |
|
0.8716388940811157, |
|
0.8208816051483154, |
|
0.7306072115898132, |
|
0.357118159532547, |
|
0.29603147506713867, |
|
0.43579915165901184, |
|
0.5577803254127502, |
|
0.7746726870536804, |
|
0.4640718102455139, |
|
0.5277922749519348, |
|
0.3958097994327545, |
|
0.8787009119987488, |
|
0.8493514060974121, |
|
0.7555126547813416, |
|
0.46826183795928955, |
|
0.4495123326778412, |
|
0.4016748368740082, |
|
0.3987813889980316, |
|
0.48629680275917053, |
|
0.3910517990589142, |
|
0.6866589188575745, |
|
0.5479096174240112, |
|
0.5331591963768005, |
|
0.8098393678665161, |
|
0.4907962381839752, |
|
0.6415437459945679, |
|
0.400081604719162, |
|
0.669253945350647, |
|
0.8595155477523804, |
|
0.14572779834270477, |
|
0.3623894453048706, |
|
0.17939132452011108, |
|
0.8952731490135193, |
|
0.8710764050483704, |
|
0.8904998302459717, |
|
0.9170958399772644, |
|
0.8013655543327332, |
|
0.9452418088912964, |
|
0.5035410523414612, |
|
0.47102150321006775, |
|
0.46311095356941223, |
|
0.36252346634864807, |
|
0.16473020613193512, |
|
0.5620750188827515, |
|
0.45467886328697205, |
|
0.47786298394203186, |
|
0.5290535092353821, |
|
0.43917059898376465, |
|
0.48329001665115356, |
|
0.4765845537185669, |
|
0.5885331630706787, |
|
0.6011337637901306, |
|
0.68278568983078, |
|
0.672578752040863, |
|
0.9778881072998047, |
|
0.7362648248672485, |
|
0.44131791591644287, |
|
0.42657074332237244, |
|
0.45161327719688416, |
|
0.966640293598175, |
|
0.9280027151107788, |
|
0.958257794380188, |
|
0.4861025810241699, |
|
0.4898521602153778, |
|
0.47333472967147827, |
|
0.5379680395126343, |
|
0.5323953628540039, |
|
0.5555186867713928, |
|
0.3566206991672516, |
|
0.5559220910072327, |
|
0.15745283663272858, |
|
0.5299971699714661, |
|
0.5367245674133301, |
|
0.5590250492095947, |
|
0.5469828844070435, |
|
0.5144724249839783, |
|
0.3406384587287903, |
|
0.32951289415359497, |
|
0.2981869876384735, |
|
0.29177844524383545, |
|
0.5109080672264099, |
|
0.5420987010002136, |
|
0.5402225255966187, |
|
0.9998739957809448, |
|
0.9997929930686951, |
|
0.982799232006073, |
|
0.919344425201416 |
|
], |
|
"eval_alpaca_hard_runtime": 13.2707, |
|
"eval_alpaca_hard_samples_per_second": 7.535, |
|
"eval_alpaca_hard_steps_per_second": 0.151, |
|
"epoch": 0.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"loss": 0.1789, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.89, |
|
"step": 51 |
|
}, |
|
{ |
|
"loss": 0.1617, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.91, |
|
"step": 52 |
|
}, |
|
{ |
|
"loss": 0.2522, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.93, |
|
"step": 53 |
|
}, |
|
{ |
|
"loss": 0.1301, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.95, |
|
"step": 54 |
|
}, |
|
{ |
|
"loss": 0.2092, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.96, |
|
"step": 55 |
|
}, |
|
{ |
|
"loss": 0.343, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 0.98, |
|
"step": 56 |
|
}, |
|
{ |
|
"loss": 0.1775, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"loss": 0.3167, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.02, |
|
"step": 58 |
|
}, |
|
{ |
|
"loss": 0.1593, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.04, |
|
"step": 59 |
|
}, |
|
{ |
|
"loss": 0.2358, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.05, |
|
"step": 60 |
|
}, |
|
{ |
|
"loss": 0.1818, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.07, |
|
"step": 61 |
|
}, |
|
{ |
|
"loss": 0.178, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.09, |
|
"step": 62 |
|
}, |
|
{ |
|
"loss": 0.1063, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.11, |
|
"step": 63 |
|
}, |
|
{ |
|
"loss": 0.1294, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.12, |
|
"step": 64 |
|
}, |
|
{ |
|
"loss": 0.2812, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.14, |
|
"step": 65 |
|
}, |
|
{ |
|
"loss": 0.2053, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.16, |
|
"step": 66 |
|
}, |
|
{ |
|
"loss": 0.2073, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.18, |
|
"step": 67 |
|
}, |
|
{ |
|
"loss": 0.1226, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.19, |
|
"step": 68 |
|
}, |
|
{ |
|
"loss": 0.1523, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.21, |
|
"step": 69 |
|
}, |
|
{ |
|
"loss": 0.0637, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.23, |
|
"step": 70 |
|
}, |
|
{ |
|
"loss": 0.1489, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.25, |
|
"step": 71 |
|
}, |
|
{ |
|
"loss": 0.1061, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.26, |
|
"step": 72 |
|
}, |
|
{ |
|
"loss": 0.1195, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.28, |
|
"step": 73 |
|
}, |
|
{ |
|
"loss": 0.139, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.3, |
|
"step": 74 |
|
}, |
|
{ |
|
"loss": 0.1934, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.32, |
|
"step": 75 |
|
}, |
|
{ |
|
"eval_alpaca_easy_loss": 0.17362338304519653, |
|
"eval_alpaca_easy_score": -0.05800193175673485, |
|
"eval_alpaca_easy_brier_score": 0.05800193175673485, |
|
"eval_alpaca_easy_average_probability": 0.8859497904777527, |
|
"eval_alpaca_easy_accuracy": 0.91, |
|
"eval_alpaca_easy_probabilities": [ |
|
0.9814141392707825, |
|
0.9669703245162964, |
|
0.938133955001831, |
|
0.9997879862785339, |
|
0.9779133200645447, |
|
0.9992324113845825, |
|
0.9836024045944214, |
|
0.8352786898612976, |
|
0.9876167178153992, |
|
0.8626598715782166, |
|
0.8485167622566223, |
|
0.9286569952964783, |
|
0.9994561076164246, |
|
0.9908925294876099, |
|
0.99991774559021, |
|
0.9943933486938477, |
|
0.9816949367523193, |
|
0.9974855184555054, |
|
0.6333666443824768, |
|
0.7378324270248413, |
|
0.9823349714279175, |
|
0.999976634979248, |
|
0.9999986886978149, |
|
0.9999675750732422, |
|
0.9795928001403809, |
|
0.9976372718811035, |
|
0.9981619715690613, |
|
0.9662743210792542, |
|
0.9859842658042908, |
|
0.9896988272666931, |
|
0.5837883353233337, |
|
0.40358614921569824, |
|
0.5803671479225159, |
|
0.9999978542327881, |
|
0.9999971389770508, |
|
0.9999898672103882, |
|
0.999998927116394, |
|
0.9999995231628418, |
|
0.9999985694885254, |
|
0.9962904453277588, |
|
0.9986306428909302, |
|
0.9968884587287903, |
|
0.8497913479804993, |
|
0.9532861113548279, |
|
0.9684224724769592, |
|
0.6850523352622986, |
|
0.7597206830978394, |
|
0.6006273031234741, |
|
0.9910978078842163, |
|
0.9928036332130432, |
|
0.815373957157135, |
|
0.9999027252197266, |
|
0.9793263673782349, |
|
0.3308352828025818, |
|
0.9976498484611511, |
|
0.9998941421508789, |
|
0.9801182746887207, |
|
0.47125259041786194, |
|
0.12768128514289856, |
|
0.8618684411048889, |
|
0.5930814146995544, |
|
0.22981221973896027, |
|
0.08031404763460159, |
|
0.9999855756759644, |
|
0.9999946355819702, |
|
0.9999954700469971, |
|
0.861335039138794, |
|
0.9934486746788025, |
|
0.6305707097053528, |
|
0.9997461438179016, |
|
0.9999979734420776, |
|
0.9999936819076538, |
|
0.9994065761566162, |
|
0.9860069751739502, |
|
0.9172288179397583, |
|
0.6882821321487427, |
|
0.4063083529472351, |
|
0.38757264614105225, |
|
0.9999996423721313, |
|
1.0, |
|
0.9999947547912598, |
|
0.999995231628418, |
|
0.9999725818634033, |
|
0.9836964011192322, |
|
0.9999876022338867, |
|
0.9999994039535522, |
|
0.9959157109260559, |
|
0.9998799562454224, |
|
0.9999147653579712, |
|
0.9999622106552124, |
|
0.9993690848350525, |
|
0.9999873638153076, |
|
0.9987542629241943, |
|
0.9999992847442627, |
|
0.9999996423721313, |
|
0.999994158744812, |
|
0.9951190948486328, |
|
0.9560932517051697, |
|
0.9992212057113647, |
|
0.4253509044647217 |
|
], |
|
"eval_alpaca_easy_runtime": 3.7813, |
|
"eval_alpaca_easy_samples_per_second": 26.446, |
|
"eval_alpaca_easy_steps_per_second": 0.529, |
|
"epoch": 1.32, |
|
"step": 75 |
|
}, |
|
{ |
|
"eval_alpaca_hard_loss": 0.6868531703948975, |
|
"eval_alpaca_hard_score": -0.22423477470874786, |
|
"eval_alpaca_hard_brier_score": 0.22423477470874786, |
|
"eval_alpaca_hard_average_probability": 0.6039663553237915, |
|
"eval_alpaca_hard_accuracy": 0.6, |
|
"eval_alpaca_hard_probabilities": [ |
|
0.7195684313774109, |
|
0.6870822310447693, |
|
0.5606399774551392, |
|
0.03213631734251976, |
|
0.06947893649339676, |
|
0.0764903798699379, |
|
0.6716647744178772, |
|
0.38889893889427185, |
|
0.6441338658332825, |
|
0.6481211185455322, |
|
0.6348515152931213, |
|
0.586635947227478, |
|
0.9901716113090515, |
|
0.9763527512550354, |
|
0.9396302103996277, |
|
0.8357298970222473, |
|
0.31430214643478394, |
|
0.15089230239391327, |
|
0.4317939579486847, |
|
0.40307217836380005, |
|
0.8377373814582825, |
|
0.7799301147460938, |
|
0.7532876133918762, |
|
0.6182923316955566, |
|
0.960372269153595, |
|
0.9502956867218018, |
|
0.8797224760055542, |
|
0.4285240173339844, |
|
0.41422519087791443, |
|
0.34578338265419006, |
|
0.39578521251678467, |
|
0.5920235514640808, |
|
0.3433470129966736, |
|
0.9059948325157166, |
|
0.6233101487159729, |
|
0.6643248200416565, |
|
0.9120030999183655, |
|
0.6137223839759827, |
|
0.7400955557823181, |
|
0.4130854606628418, |
|
0.6910074353218079, |
|
0.9417619705200195, |
|
0.05031133443117142, |
|
0.34697777032852173, |
|
0.06915117055177689, |
|
0.9617620706558228, |
|
0.9447910785675049, |
|
0.9539125561714172, |
|
0.9690456986427307, |
|
0.8752149343490601, |
|
0.9869959354400635, |
|
0.503294050693512, |
|
0.40295642614364624, |
|
0.510491132736206, |
|
0.43736109137535095, |
|
0.1717284917831421, |
|
0.5252802968025208, |
|
0.446821391582489, |
|
0.49211370944976807, |
|
0.5365777611732483, |
|
0.4596339762210846, |
|
0.4909523129463196, |
|
0.4946014881134033, |
|
0.6593226790428162, |
|
0.6939438581466675, |
|
0.751128077507019, |
|
0.9175297021865845, |
|
0.9999277591705322, |
|
0.9369798302650452, |
|
0.47428709268569946, |
|
0.3868890106678009, |
|
0.41381287574768066, |
|
0.9797312617301941, |
|
0.9649485945701599, |
|
0.96959388256073, |
|
0.4884977340698242, |
|
0.50208580493927, |
|
0.45323678851127625, |
|
0.5411497950553894, |
|
0.6031054258346558, |
|
0.5808246731758118, |
|
0.28195828199386597, |
|
0.44778403639793396, |
|
0.20863527059555054, |
|
0.5962353348731995, |
|
0.5934168100357056, |
|
0.6204326748847961, |
|
0.6265460252761841, |
|
0.6301620602607727, |
|
0.2525644898414612, |
|
0.49464210867881775, |
|
0.40197068452835083, |
|
0.39711490273475647, |
|
0.4450596868991852, |
|
0.394875168800354, |
|
0.49281904101371765, |
|
0.9999983310699463, |
|
0.9999977350234985, |
|
0.9998503923416138, |
|
0.9993195533752441 |
|
], |
|
"eval_alpaca_hard_runtime": 13.2686, |
|
"eval_alpaca_hard_samples_per_second": 7.537, |
|
"eval_alpaca_hard_steps_per_second": 0.151, |
|
"epoch": 1.32, |
|
"step": 75 |
|
}, |
|
{ |
|
"loss": 0.1722, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.33, |
|
"step": 76 |
|
}, |
|
{ |
|
"loss": 0.1066, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.35, |
|
"step": 77 |
|
}, |
|
{ |
|
"loss": 0.18, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.37, |
|
"step": 78 |
|
}, |
|
{ |
|
"loss": 0.2533, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.39, |
|
"step": 79 |
|
}, |
|
{ |
|
"loss": 0.1293, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.4, |
|
"step": 80 |
|
}, |
|
{ |
|
"loss": 0.2125, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.42, |
|
"step": 81 |
|
}, |
|
{ |
|
"loss": 0.05, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.44, |
|
"step": 82 |
|
}, |
|
{ |
|
"loss": 0.1155, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.46, |
|
"step": 83 |
|
}, |
|
{ |
|
"loss": 0.2425, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.47, |
|
"step": 84 |
|
}, |
|
{ |
|
"loss": 0.1105, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.49, |
|
"step": 85 |
|
}, |
|
{ |
|
"loss": 0.1689, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.51, |
|
"step": 86 |
|
}, |
|
{ |
|
"loss": 0.094, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.53, |
|
"step": 87 |
|
}, |
|
{ |
|
"loss": 0.0889, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.54, |
|
"step": 88 |
|
}, |
|
{ |
|
"loss": 0.0568, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.56, |
|
"step": 89 |
|
}, |
|
{ |
|
"loss": 0.1448, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.58, |
|
"step": 90 |
|
}, |
|
{ |
|
"loss": 0.1498, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.6, |
|
"step": 91 |
|
}, |
|
{ |
|
"loss": 0.1667, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.61, |
|
"step": 92 |
|
}, |
|
{ |
|
"loss": 0.1256, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.63, |
|
"step": 93 |
|
}, |
|
{ |
|
"loss": 0.1408, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.65, |
|
"step": 94 |
|
}, |
|
{ |
|
"loss": 0.1304, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.67, |
|
"step": 95 |
|
}, |
|
{ |
|
"loss": 0.1628, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.68, |
|
"step": 96 |
|
}, |
|
{ |
|
"loss": 0.0764, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.7, |
|
"step": 97 |
|
}, |
|
{ |
|
"loss": 0.131, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.72, |
|
"step": 98 |
|
}, |
|
{ |
|
"loss": 0.1613, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.74, |
|
"step": 99 |
|
}, |
|
{ |
|
"loss": 0.0609, |
|
"learning_rate": 7.2e-05, |
|
"epoch": 1.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_alpaca_easy_loss": 0.14305388927459717, |
|
"eval_alpaca_easy_score": -0.04583239182829857, |
|
"eval_alpaca_easy_brier_score": 0.04583239182829857, |
|
"eval_alpaca_easy_average_probability": 0.8988600969314575, |
|
"eval_alpaca_easy_accuracy": 0.94, |
|
"eval_alpaca_easy_probabilities": [ |
|
0.996820330619812, |
|
0.9908674955368042, |
|
0.9839729070663452, |
|
0.9997697472572327, |
|
0.9935408234596252, |
|
0.9989989399909973, |
|
0.9893901944160461, |
|
0.8882415294647217, |
|
0.9966920614242554, |
|
0.8718615174293518, |
|
0.8631781935691833, |
|
0.9655619263648987, |
|
0.9941714406013489, |
|
0.9529747366905212, |
|
0.9994912147521973, |
|
0.9851876497268677, |
|
0.982313871383667, |
|
0.9943521022796631, |
|
0.6093587875366211, |
|
0.5961812138557434, |
|
0.9631479978561401, |
|
0.9977989792823792, |
|
0.9998836517333984, |
|
0.9986874461174011, |
|
0.9446632862091064, |
|
0.9951596856117249, |
|
0.9964768290519714, |
|
0.9390566349029541, |
|
0.9299038648605347, |
|
0.9697977304458618, |
|
0.5837473273277283, |
|
0.4012439250946045, |
|
0.5686047077178955, |
|
0.9999639987945557, |
|
0.9999489784240723, |
|
0.9997902512550354, |
|
0.999981164932251, |
|
0.9999935626983643, |
|
0.9999730587005615, |
|
0.9976915121078491, |
|
0.9996391534805298, |
|
0.9988808035850525, |
|
0.8831126093864441, |
|
0.9712970852851868, |
|
0.9558087587356567, |
|
0.8308846950531006, |
|
0.9274584054946899, |
|
0.7136300206184387, |
|
0.9957448840141296, |
|
0.9969441294670105, |
|
0.8197059035301208, |
|
0.9999794960021973, |
|
0.995776355266571, |
|
0.7675074934959412, |
|
0.9992235898971558, |
|
0.9998378753662109, |
|
0.9934118390083313, |
|
0.5503733158111572, |
|
0.18256528675556183, |
|
0.6016505360603333, |
|
0.7381737232208252, |
|
0.32503536343574524, |
|
0.2153652012348175, |
|
0.9998658895492554, |
|
0.9999431371688843, |
|
0.9999639987945557, |
|
0.9294857382774353, |
|
0.9895972609519958, |
|
0.640900194644928, |
|
0.999634861946106, |
|
0.9999843835830688, |
|
0.9999666213989258, |
|
0.9959270358085632, |
|
0.9684234857559204, |
|
0.9475449919700623, |
|
0.7184933423995972, |
|
0.4138542115688324, |
|
0.3449597954750061, |
|
0.9999960660934448, |
|
0.999998927116394, |
|
0.9999078512191772, |
|
0.9999330043792725, |
|
0.9995478987693787, |
|
0.9607729911804199, |
|
0.9999856948852539, |
|
0.9999980926513672, |
|
0.9993178844451904, |
|
0.9984322190284729, |
|
0.9992688298225403, |
|
0.9996477365493774, |
|
0.9986903071403503, |
|
0.9999394416809082, |
|
0.9978135824203491, |
|
0.9999845027923584, |
|
0.9999935626983643, |
|
0.9998742341995239, |
|
0.9854878783226013, |
|
0.9131860136985779, |
|
0.9962959885597229, |
|
0.6889436841011047 |
|
], |
|
"eval_alpaca_easy_runtime": 3.7858, |
|
"eval_alpaca_easy_samples_per_second": 26.414, |
|
"eval_alpaca_easy_steps_per_second": 0.528, |
|
"epoch": 1.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_alpaca_hard_loss": 0.6546275615692139, |
|
"eval_alpaca_hard_score": -0.21590329706668854, |
|
"eval_alpaca_hard_brier_score": 0.21590329706668854, |
|
"eval_alpaca_hard_average_probability": 0.6073423624038696, |
|
"eval_alpaca_hard_accuracy": 0.65, |
|
"eval_alpaca_hard_probabilities": [ |
|
0.5967098474502563, |
|
0.6487985253334045, |
|
0.4901181161403656, |
|
0.04925023019313812, |
|
0.1195535808801651, |
|
0.11974883079528809, |
|
0.7021012902259827, |
|
0.4458252489566803, |
|
0.6431359648704529, |
|
0.640688419342041, |
|
0.6369943022727966, |
|
0.5759397745132446, |
|
0.9847426414489746, |
|
0.965976357460022, |
|
0.9159088134765625, |
|
0.840829610824585, |
|
0.31512972712516785, |
|
0.1900986284017563, |
|
0.5496805906295776, |
|
0.5141911506652832, |
|
0.8742411732673645, |
|
0.7793548107147217, |
|
0.757411539554596, |
|
0.6102295517921448, |
|
0.9421865344047546, |
|
0.9310181140899658, |
|
0.8424404263496399, |
|
0.42353448271751404, |
|
0.412136435508728, |
|
0.36248868703842163, |
|
0.3814578056335449, |
|
0.565723180770874, |
|
0.34751203656196594, |
|
0.9329924583435059, |
|
0.6731026768684387, |
|
0.7347862720489502, |
|
0.916222333908081, |
|
0.6413589715957642, |
|
0.7097361087799072, |
|
0.41041406989097595, |
|
0.718764066696167, |
|
0.9438978433609009, |
|
0.06302079558372498, |
|
0.33584702014923096, |
|
0.10254550725221634, |
|
0.9500865936279297, |
|
0.935807466506958, |
|
0.9368900656700134, |
|
0.9388522505760193, |
|
0.8173050284385681, |
|
0.97514408826828, |
|
0.49964869022369385, |
|
0.412986695766449, |
|
0.47653329372406006, |
|
0.33894622325897217, |
|
0.18001103401184082, |
|
0.5316197872161865, |
|
0.464274525642395, |
|
0.516484797000885, |
|
0.5388634204864502, |
|
0.4795582890510559, |
|
0.5031061768531799, |
|
0.5038958191871643, |
|
0.6659919023513794, |
|
0.7104204297065735, |
|
0.7587425112724304, |
|
0.8117100596427917, |
|
0.9964817762374878, |
|
0.8733975887298584, |
|
0.46874865889549255, |
|
0.38377025723457336, |
|
0.4227462708950043, |
|
0.9596135020256042, |
|
0.9397307634353638, |
|
0.9512374401092529, |
|
0.4855881631374359, |
|
0.49799981713294983, |
|
0.4557354152202606, |
|
0.5651748180389404, |
|
0.6250565052032471, |
|
0.591163158416748, |
|
0.2509346008300781, |
|
0.40784627199172974, |
|
0.1966390460729599, |
|
0.5992413759231567, |
|
0.5877768397331238, |
|
0.6198946833610535, |
|
0.5964317917823792, |
|
0.6344363689422607, |
|
0.25434669852256775, |
|
0.6021557450294495, |
|
0.513039231300354, |
|
0.515647828578949, |
|
0.4882362484931946, |
|
0.45470163226127625, |
|
0.5013281106948853, |
|
0.9999687671661377, |
|
0.9999552965164185, |
|
0.9947550296783447, |
|
0.9957408905029297 |
|
], |
|
"eval_alpaca_hard_runtime": 13.2701, |
|
"eval_alpaca_hard_samples_per_second": 7.536, |
|
"eval_alpaca_hard_steps_per_second": 0.151, |
|
"epoch": 1.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"train_runtime": 438.2982, |
|
"train_samples_per_second": 7.301, |
|
"train_steps_per_second": 0.228, |
|
"total_flos": 0.0, |
|
"train_loss": 0.3524343585968018, |
|
"epoch": 1.75, |
|
"step": 100 |
|
} |
|
] |