|
[ |
|
{ |
|
"loss": 0.8525, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.02, |
|
"step": 1 |
|
}, |
|
{ |
|
"loss": 0.6634, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.04, |
|
"step": 2 |
|
}, |
|
{ |
|
"loss": 0.7302, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.05, |
|
"step": 3 |
|
}, |
|
{ |
|
"loss": 0.7507, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.07, |
|
"step": 4 |
|
}, |
|
{ |
|
"loss": 0.7715, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.09, |
|
"step": 5 |
|
}, |
|
{ |
|
"loss": 0.7064, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.11, |
|
"step": 6 |
|
}, |
|
{ |
|
"loss": 0.7124, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.12, |
|
"step": 7 |
|
}, |
|
{ |
|
"loss": 0.7223, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.14, |
|
"step": 8 |
|
}, |
|
{ |
|
"loss": 0.6888, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.16, |
|
"step": 9 |
|
}, |
|
{ |
|
"loss": 0.7404, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.18, |
|
"step": 10 |
|
}, |
|
{ |
|
"loss": 0.6575, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.19, |
|
"step": 11 |
|
}, |
|
{ |
|
"loss": 0.6896, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.21, |
|
"step": 12 |
|
}, |
|
{ |
|
"loss": 0.6336, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.23, |
|
"step": 13 |
|
}, |
|
{ |
|
"loss": 0.6474, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.25, |
|
"step": 14 |
|
}, |
|
{ |
|
"loss": 0.6993, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.26, |
|
"step": 15 |
|
}, |
|
{ |
|
"loss": 0.6219, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.28, |
|
"step": 16 |
|
}, |
|
{ |
|
"loss": 0.7434, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.3, |
|
"step": 17 |
|
}, |
|
{ |
|
"loss": 0.7488, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.32, |
|
"step": 18 |
|
}, |
|
{ |
|
"loss": 0.7165, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.33, |
|
"step": 19 |
|
}, |
|
{ |
|
"loss": 0.7253, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.35, |
|
"step": 20 |
|
}, |
|
{ |
|
"loss": 0.6785, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.37, |
|
"step": 21 |
|
}, |
|
{ |
|
"loss": 0.7281, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.39, |
|
"step": 22 |
|
}, |
|
{ |
|
"loss": 0.7451, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.4, |
|
"step": 23 |
|
}, |
|
{ |
|
"loss": 0.6618, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.42, |
|
"step": 24 |
|
}, |
|
{ |
|
"loss": 0.6452, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"eval_commonsense_qa_loss": 0.7914362549781799, |
|
"eval_commonsense_qa_score": -0.29111722111701965, |
|
"eval_commonsense_qa_brier_score": 0.29111722111701965, |
|
"eval_commonsense_qa_average_probability": 0.47185489535331726, |
|
"eval_commonsense_qa_accuracy": 0.39, |
|
"eval_commonsense_qa_probabilities": [ |
|
0.3736628592014313, |
|
0.41951867938041687, |
|
0.40974757075309753, |
|
0.43108245730400085, |
|
0.39417126774787903, |
|
0.36827391386032104, |
|
0.3217296004295349, |
|
0.4527994692325592, |
|
0.44031405448913574, |
|
0.5761461853981018, |
|
0.6323458552360535, |
|
0.5307877659797668, |
|
0.3072277307510376, |
|
0.35649847984313965, |
|
0.24212150275707245, |
|
0.4968399107456207, |
|
0.7714088559150696, |
|
0.4102341830730438, |
|
0.5640316605567932, |
|
0.5279660820960999, |
|
0.26524466276168823, |
|
0.46642741560935974, |
|
0.667880654335022, |
|
0.5137766003608704, |
|
0.7010161280632019, |
|
0.5712583065032959, |
|
0.5321078300476074, |
|
0.4304255247116089, |
|
0.25722262263298035, |
|
0.49681854248046875, |
|
0.39940145611763, |
|
0.38804712891578674, |
|
0.413534939289093, |
|
0.36740759015083313, |
|
0.3980674743652344, |
|
0.3686825931072235, |
|
0.45741069316864014, |
|
0.4793001115322113, |
|
0.5210141539573669, |
|
0.5084275603294373, |
|
0.5377230644226074, |
|
0.4399715065956116, |
|
0.5001020431518555, |
|
0.4058927297592163, |
|
0.4787653684616089, |
|
0.35228681564331055, |
|
0.3852904140949249, |
|
0.38124364614486694, |
|
0.49779626727104187, |
|
0.47046318650245667, |
|
0.45446330308914185, |
|
0.49293678998947144, |
|
0.5469354391098022, |
|
0.4111570715904236, |
|
0.6435064077377319, |
|
0.3415040075778961, |
|
0.557502269744873, |
|
0.6114356517791748, |
|
0.4695909917354584, |
|
0.3526020348072052, |
|
0.6928711533546448, |
|
0.688827633857727, |
|
0.6876643896102905, |
|
0.4979974031448364, |
|
0.5267980098724365, |
|
0.5480849146842957, |
|
0.5159767866134644, |
|
0.6435903310775757, |
|
0.48293471336364746, |
|
0.4974452555179596, |
|
0.39486047625541687, |
|
0.3298123776912689, |
|
0.39702242612838745, |
|
0.2585730254650116, |
|
0.5650836229324341, |
|
0.6514415144920349, |
|
0.567798376083374, |
|
0.5929319858551025, |
|
0.35160502791404724, |
|
0.39691317081451416, |
|
0.589564859867096, |
|
0.4809286296367645, |
|
0.3265831768512726, |
|
0.34647417068481445, |
|
0.29951584339141846, |
|
0.540679931640625, |
|
0.477059006690979, |
|
0.42304784059524536, |
|
0.3258381187915802, |
|
0.33986833691596985, |
|
0.6030049920082092, |
|
0.5700676441192627, |
|
0.5453739166259766, |
|
0.5489950180053711, |
|
0.5006292462348938, |
|
0.5012415647506714, |
|
0.4897501468658447, |
|
0.405569463968277, |
|
0.41794145107269287, |
|
0.6035709977149963 |
|
], |
|
"eval_commonsense_qa_runtime": 5.072, |
|
"eval_commonsense_qa_samples_per_second": 19.716, |
|
"eval_commonsense_qa_steps_per_second": 0.789, |
|
"epoch": 0.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"eval_trivia_qa_loss": 0.7058285474777222, |
|
"eval_trivia_qa_score": -0.2509709298610687, |
|
"eval_trivia_qa_brier_score": 0.2509709298610687, |
|
"eval_trivia_qa_average_probability": 0.508730411529541, |
|
"eval_trivia_qa_accuracy": 0.56, |
|
"eval_trivia_qa_probabilities": [ |
|
0.6108027696609497, |
|
0.47548747062683105, |
|
0.5868535041809082, |
|
0.32189613580703735, |
|
0.4967602789402008, |
|
0.5013786554336548, |
|
0.582642138004303, |
|
0.5476276278495789, |
|
0.6525075435638428, |
|
0.3721744418144226, |
|
0.46289870142936707, |
|
0.5139896273612976, |
|
0.6440446376800537, |
|
0.5662510395050049, |
|
0.5801640748977661, |
|
0.27492260932922363, |
|
0.5860297679901123, |
|
0.4890085756778717, |
|
0.6448793411254883, |
|
0.5299046039581299, |
|
0.3838443160057068, |
|
0.2839445173740387, |
|
0.5129914879798889, |
|
0.6394293308258057, |
|
0.5504795908927917, |
|
0.6178440451622009, |
|
0.35971587896347046, |
|
0.45417338609695435, |
|
0.42903023958206177, |
|
0.559572160243988, |
|
0.5979811549186707, |
|
0.4994199275970459, |
|
0.5579876899719238, |
|
0.7093907594680786, |
|
0.4760594666004181, |
|
0.4218994379043579, |
|
0.5057868361473083, |
|
0.6060559153556824, |
|
0.5037559866905212, |
|
0.5046707391738892, |
|
0.42048025131225586, |
|
0.6409589648246765, |
|
0.5305330753326416, |
|
0.3996846079826355, |
|
0.4467884302139282, |
|
0.6522667407989502, |
|
0.4101843535900116, |
|
0.3745155334472656, |
|
0.5663012266159058, |
|
0.48798033595085144, |
|
0.6159585118293762, |
|
0.47460001707077026, |
|
0.6789429187774658, |
|
0.3534046411514282, |
|
0.5315006375312805, |
|
0.4756757915019989, |
|
0.6186851263046265, |
|
0.5154322981834412, |
|
0.37286680936813354, |
|
0.5532978177070618, |
|
0.45223331451416016, |
|
0.41775307059288025, |
|
0.5951501727104187, |
|
0.640890896320343, |
|
0.4332258701324463, |
|
0.6166451573371887, |
|
0.626221239566803, |
|
0.545464277267456, |
|
0.36058875918388367, |
|
0.4321788251399994, |
|
0.4153941571712494, |
|
0.5391730070114136, |
|
0.396107017993927, |
|
0.5979107618331909, |
|
0.5979316830635071, |
|
0.40809130668640137, |
|
0.6226964592933655, |
|
0.38245290517807007, |
|
0.587360680103302, |
|
0.44519785046577454, |
|
0.659287691116333, |
|
0.570533812046051, |
|
0.3212454915046692, |
|
0.5290343761444092, |
|
0.3304256200790405, |
|
0.5141531229019165, |
|
0.4628876745700836, |
|
0.4035484790802002, |
|
0.5565099716186523, |
|
0.5873599052429199, |
|
0.5347287058830261, |
|
0.4785170257091522, |
|
0.44041702151298523, |
|
0.4931771457195282, |
|
0.5670301914215088, |
|
0.5181891322135925, |
|
0.6944608688354492, |
|
0.4809003174304962, |
|
0.5023355484008789, |
|
0.3813191056251526 |
|
], |
|
"eval_trivia_qa_runtime": 6.9532, |
|
"eval_trivia_qa_samples_per_second": 14.382, |
|
"eval_trivia_qa_steps_per_second": 0.575, |
|
"epoch": 0.44, |
|
"step": 25 |
|
}, |
|
{ |
|
"loss": 0.6417, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.46, |
|
"step": 26 |
|
}, |
|
{ |
|
"loss": 0.696, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.47, |
|
"step": 27 |
|
}, |
|
{ |
|
"loss": 0.688, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.49, |
|
"step": 28 |
|
}, |
|
{ |
|
"loss": 0.6599, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.51, |
|
"step": 29 |
|
}, |
|
{ |
|
"loss": 0.726, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.53, |
|
"step": 30 |
|
}, |
|
{ |
|
"loss": 0.6367, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.54, |
|
"step": 31 |
|
}, |
|
{ |
|
"loss": 0.7377, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.56, |
|
"step": 32 |
|
}, |
|
{ |
|
"loss": 0.5971, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.58, |
|
"step": 33 |
|
}, |
|
{ |
|
"loss": 0.7561, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.6, |
|
"step": 34 |
|
}, |
|
{ |
|
"loss": 0.6453, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.61, |
|
"step": 35 |
|
}, |
|
{ |
|
"loss": 0.6648, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.63, |
|
"step": 36 |
|
}, |
|
{ |
|
"loss": 0.6106, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.65, |
|
"step": 37 |
|
}, |
|
{ |
|
"loss": 0.6421, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.67, |
|
"step": 38 |
|
}, |
|
{ |
|
"loss": 0.6576, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.68, |
|
"step": 39 |
|
}, |
|
{ |
|
"loss": 0.6426, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.7, |
|
"step": 40 |
|
}, |
|
{ |
|
"loss": 0.6443, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.72, |
|
"step": 41 |
|
}, |
|
{ |
|
"loss": 0.6842, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.74, |
|
"step": 42 |
|
}, |
|
{ |
|
"loss": 0.6407, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.75, |
|
"step": 43 |
|
}, |
|
{ |
|
"loss": 0.7051, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.77, |
|
"step": 44 |
|
}, |
|
{ |
|
"loss": 0.6729, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.79, |
|
"step": 45 |
|
}, |
|
{ |
|
"loss": 0.6876, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.81, |
|
"step": 46 |
|
}, |
|
{ |
|
"loss": 0.6711, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.82, |
|
"step": 47 |
|
}, |
|
{ |
|
"loss": 0.6613, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.84, |
|
"step": 48 |
|
}, |
|
{ |
|
"loss": 0.6044, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.86, |
|
"step": 49 |
|
}, |
|
{ |
|
"loss": 0.5769, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_commonsense_qa_loss": 0.7653241157531738, |
|
"eval_commonsense_qa_score": -0.2791268527507782, |
|
"eval_commonsense_qa_brier_score": 0.2791268527507782, |
|
"eval_commonsense_qa_average_probability": 0.48683643341064453, |
|
"eval_commonsense_qa_accuracy": 0.42, |
|
"eval_commonsense_qa_probabilities": [ |
|
0.4020081162452698, |
|
0.48435282707214355, |
|
0.43516698479652405, |
|
0.4614918529987335, |
|
0.4766950011253357, |
|
0.4323585629463196, |
|
0.30255478620529175, |
|
0.29709967970848083, |
|
0.545942485332489, |
|
0.5383279323577881, |
|
0.6857153177261353, |
|
0.48378315567970276, |
|
0.16650518774986267, |
|
0.2533150315284729, |
|
0.19820035994052887, |
|
0.44554558396339417, |
|
0.7646204233169556, |
|
0.6391469240188599, |
|
0.5909577012062073, |
|
0.5142195224761963, |
|
0.46391570568084717, |
|
0.4665536880493164, |
|
0.5840488076210022, |
|
0.43470215797424316, |
|
0.7169751524925232, |
|
0.7758315205574036, |
|
0.5787749886512756, |
|
0.5941563844680786, |
|
0.46830788254737854, |
|
0.5470129251480103, |
|
0.46815431118011475, |
|
0.526231050491333, |
|
0.5080574750900269, |
|
0.29635000228881836, |
|
0.47670942544937134, |
|
0.32638877630233765, |
|
0.4723879396915436, |
|
0.6098498702049255, |
|
0.4592856168746948, |
|
0.5818286538124084, |
|
0.5597203373908997, |
|
0.3799402415752411, |
|
0.3624171018600464, |
|
0.40649276971817017, |
|
0.42219263315200806, |
|
0.3799329996109009, |
|
0.3508457839488983, |
|
0.46101444959640503, |
|
0.4670948088169098, |
|
0.6448565721511841, |
|
0.4881596565246582, |
|
0.44159600138664246, |
|
0.6058750748634338, |
|
0.590934157371521, |
|
0.46451979875564575, |
|
0.37275078892707825, |
|
0.5106647610664368, |
|
0.6377049684524536, |
|
0.5288455486297607, |
|
0.4052494764328003, |
|
0.6817584037780762, |
|
0.682870090007782, |
|
0.7456340193748474, |
|
0.4100780189037323, |
|
0.5996410846710205, |
|
0.46194082498550415, |
|
0.4774899482727051, |
|
0.6287940740585327, |
|
0.442749559879303, |
|
0.38106194138526917, |
|
0.3868009150028229, |
|
0.34707480669021606, |
|
0.5333090424537659, |
|
0.4054989218711853, |
|
0.6355715990066528, |
|
0.5217822790145874, |
|
0.3845539689064026, |
|
0.45426151156425476, |
|
0.43503573536872864, |
|
0.4364105463027954, |
|
0.6591715812683105, |
|
0.5792595744132996, |
|
0.22389499843120575, |
|
0.4098465144634247, |
|
0.3524221181869507, |
|
0.42123866081237793, |
|
0.48706841468811035, |
|
0.41214796900749207, |
|
0.22283338010311127, |
|
0.28352028131484985, |
|
0.688373863697052, |
|
0.5961286425590515, |
|
0.6101290583610535, |
|
0.5565056204795837, |
|
0.5037069320678711, |
|
0.41246306896209717, |
|
0.6470851302146912, |
|
0.5888364315032959, |
|
0.4137541353702545, |
|
0.6086077690124512 |
|
], |
|
"eval_commonsense_qa_runtime": 5.0497, |
|
"eval_commonsense_qa_samples_per_second": 19.803, |
|
"eval_commonsense_qa_steps_per_second": 0.792, |
|
"epoch": 0.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"eval_trivia_qa_loss": 0.6967568397521973, |
|
"eval_trivia_qa_score": -0.24871157109737396, |
|
"eval_trivia_qa_brier_score": 0.24871157109737396, |
|
"eval_trivia_qa_average_probability": 0.5072451829910278, |
|
"eval_trivia_qa_accuracy": 0.5, |
|
"eval_trivia_qa_probabilities": [ |
|
0.5316422581672668, |
|
0.432894766330719, |
|
0.5510445833206177, |
|
0.4671865999698639, |
|
0.5174410343170166, |
|
0.5549404621124268, |
|
0.4334378242492676, |
|
0.39600223302841187, |
|
0.528852641582489, |
|
0.42653071880340576, |
|
0.5234686136245728, |
|
0.42779213190078735, |
|
0.5071923732757568, |
|
0.4844723641872406, |
|
0.5264027714729309, |
|
0.439650297164917, |
|
0.553399384021759, |
|
0.5268545746803284, |
|
0.42666733264923096, |
|
0.47586703300476074, |
|
0.4965582489967346, |
|
0.34971049427986145, |
|
0.6940780878067017, |
|
0.5421137809753418, |
|
0.48094817996025085, |
|
0.43245548009872437, |
|
0.5597365498542786, |
|
0.4554421305656433, |
|
0.4893034100532532, |
|
0.5171124935150146, |
|
0.5146493911743164, |
|
0.495728462934494, |
|
0.4713417887687683, |
|
0.6222578287124634, |
|
0.498948872089386, |
|
0.40657901763916016, |
|
0.527847409248352, |
|
0.7099897265434265, |
|
0.5247665047645569, |
|
0.4212891161441803, |
|
0.5828162431716919, |
|
0.540741503238678, |
|
0.5784919857978821, |
|
0.5161374807357788, |
|
0.4517030119895935, |
|
0.6503217220306396, |
|
0.5777159929275513, |
|
0.36965277791023254, |
|
0.4844502806663513, |
|
0.5828574299812317, |
|
0.6145827174186707, |
|
0.3802032172679901, |
|
0.7286924123764038, |
|
0.4977825880050659, |
|
0.5343342423439026, |
|
0.5175969004631042, |
|
0.48933130502700806, |
|
0.5237868428230286, |
|
0.39064183831214905, |
|
0.5096392035484314, |
|
0.5159491896629333, |
|
0.3979681730270386, |
|
0.6354355812072754, |
|
0.5407276153564453, |
|
0.4297018051147461, |
|
0.5711097121238708, |
|
0.6063168048858643, |
|
0.4898560345172882, |
|
0.43474793434143066, |
|
0.5227797627449036, |
|
0.4176207184791565, |
|
0.6359185576438904, |
|
0.4207601547241211, |
|
0.4994364082813263, |
|
0.5145835876464844, |
|
0.37604933977127075, |
|
0.5790051817893982, |
|
0.45702680945396423, |
|
0.48608124256134033, |
|
0.47564682364463806, |
|
0.6444876194000244, |
|
0.4628547132015228, |
|
0.4236738979816437, |
|
0.5284913778305054, |
|
0.3784801661968231, |
|
0.6344289779663086, |
|
0.523414671421051, |
|
0.4968854784965515, |
|
0.4976916015148163, |
|
0.4860800504684448, |
|
0.47529691457748413, |
|
0.4533490836620331, |
|
0.4189370274543762, |
|
0.6194345355033875, |
|
0.484923779964447, |
|
0.5832501649856567, |
|
0.6392970681190491, |
|
0.509278416633606, |
|
0.5340873003005981, |
|
0.4593735337257385 |
|
], |
|
"eval_trivia_qa_runtime": 6.9799, |
|
"eval_trivia_qa_samples_per_second": 14.327, |
|
"eval_trivia_qa_steps_per_second": 0.573, |
|
"epoch": 0.88, |
|
"step": 50 |
|
}, |
|
{ |
|
"loss": 0.6601, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.89, |
|
"step": 51 |
|
}, |
|
{ |
|
"loss": 0.6454, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.91, |
|
"step": 52 |
|
}, |
|
{ |
|
"loss": 0.6547, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.93, |
|
"step": 53 |
|
}, |
|
{ |
|
"loss": 0.7068, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.95, |
|
"step": 54 |
|
}, |
|
{ |
|
"loss": 0.6629, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.96, |
|
"step": 55 |
|
}, |
|
{ |
|
"loss": 0.7136, |
|
"learning_rate": 0.0002, |
|
"epoch": 0.98, |
|
"step": 56 |
|
}, |
|
{ |
|
"loss": 0.6437, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"loss": 0.5198, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.02, |
|
"step": 58 |
|
}, |
|
{ |
|
"loss": 0.4605, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.04, |
|
"step": 59 |
|
}, |
|
{ |
|
"loss": 0.4861, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.05, |
|
"step": 60 |
|
}, |
|
{ |
|
"loss": 0.483, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.07, |
|
"step": 61 |
|
}, |
|
{ |
|
"loss": 0.5581, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.09, |
|
"step": 62 |
|
}, |
|
{ |
|
"loss": 0.4564, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.11, |
|
"step": 63 |
|
}, |
|
{ |
|
"loss": 0.4056, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.12, |
|
"step": 64 |
|
}, |
|
{ |
|
"loss": 0.4167, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.14, |
|
"step": 65 |
|
}, |
|
{ |
|
"loss": 0.7253, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.16, |
|
"step": 66 |
|
}, |
|
{ |
|
"loss": 0.4609, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.18, |
|
"step": 67 |
|
}, |
|
{ |
|
"loss": 0.5302, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.19, |
|
"step": 68 |
|
}, |
|
{ |
|
"loss": 0.6734, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.21, |
|
"step": 69 |
|
}, |
|
{ |
|
"loss": 0.5224, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.23, |
|
"step": 70 |
|
}, |
|
{ |
|
"loss": 0.4326, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.25, |
|
"step": 71 |
|
}, |
|
{ |
|
"loss": 0.5751, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.26, |
|
"step": 72 |
|
}, |
|
{ |
|
"loss": 0.5653, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.28, |
|
"step": 73 |
|
}, |
|
{ |
|
"loss": 0.4586, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.3, |
|
"step": 74 |
|
}, |
|
{ |
|
"loss": 0.4466, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.32, |
|
"step": 75 |
|
}, |
|
{ |
|
"eval_commonsense_qa_loss": 1.0953351259231567, |
|
"eval_commonsense_qa_score": -0.3630787134170532, |
|
"eval_commonsense_qa_brier_score": 0.3630787134170532, |
|
"eval_commonsense_qa_average_probability": 0.46727100014686584, |
|
"eval_commonsense_qa_accuracy": 0.47, |
|
"eval_commonsense_qa_probabilities": [ |
|
0.6413354277610779, |
|
0.6861792206764221, |
|
0.9139598608016968, |
|
0.3129615783691406, |
|
0.4858136773109436, |
|
0.3958660662174225, |
|
0.015349932946264744, |
|
0.03121619112789631, |
|
0.3732204735279083, |
|
0.5287988781929016, |
|
0.9410275220870972, |
|
0.4859994351863861, |
|
0.00484914006665349, |
|
0.06006177142262459, |
|
0.016101302579045296, |
|
0.25591790676116943, |
|
0.9382115006446838, |
|
0.7453911304473877, |
|
0.49844464659690857, |
|
0.11961045861244202, |
|
0.21799755096435547, |
|
0.22540347278118134, |
|
0.6641191840171814, |
|
0.3415297865867615, |
|
0.9549143314361572, |
|
0.9577696323394775, |
|
0.3804933726787567, |
|
0.5659024715423584, |
|
0.6399015188217163, |
|
0.6054954528808594, |
|
0.16932412981987, |
|
0.24352750182151794, |
|
0.5880881547927856, |
|
0.10066776722669601, |
|
0.2192084938287735, |
|
0.08628320693969727, |
|
0.33604246377944946, |
|
0.27943921089172363, |
|
0.3816390931606293, |
|
0.7643809914588928, |
|
0.7210124731063843, |
|
0.5266180038452148, |
|
0.204672709107399, |
|
0.5603741407394409, |
|
0.6822576522827148, |
|
0.19128814339637756, |
|
0.20498374104499817, |
|
0.09368855506181717, |
|
0.6106529235839844, |
|
0.8263741135597229, |
|
0.6096042394638062, |
|
0.3612224757671356, |
|
0.3293110430240631, |
|
0.4423503577709198, |
|
0.1176837608218193, |
|
0.1601405143737793, |
|
0.3888700008392334, |
|
0.8851404786109924, |
|
0.578056275844574, |
|
0.2645527720451355, |
|
0.9431173205375671, |
|
0.8939391374588013, |
|
0.9614054560661316, |
|
0.15922978520393372, |
|
0.5092063546180725, |
|
0.7479695081710815, |
|
0.2555106580257416, |
|
0.9339343905448914, |
|
0.5020483136177063, |
|
0.2890869677066803, |
|
0.25056713819503784, |
|
0.07140284031629562, |
|
0.6142622828483582, |
|
0.6608532667160034, |
|
0.8618139028549194, |
|
0.8793162107467651, |
|
0.5890896916389465, |
|
0.8838966488838196, |
|
0.4716782569885254, |
|
0.398629367351532, |
|
0.8436529040336609, |
|
0.3973071277141571, |
|
0.06621242314577103, |
|
0.5546140670776367, |
|
0.16331911087036133, |
|
0.606509804725647, |
|
0.6033067107200623, |
|
0.2906716763973236, |
|
0.009353392757475376, |
|
0.061065930873155594, |
|
0.8538415431976318, |
|
0.5957357883453369, |
|
0.7623599767684937, |
|
0.6353998780250549, |
|
0.6718450784683228, |
|
0.06562834978103638, |
|
0.4774560332298279, |
|
0.20875515043735504, |
|
0.2653869092464447, |
|
0.7864195108413696 |
|
], |
|
"eval_commonsense_qa_runtime": 5.0892, |
|
"eval_commonsense_qa_samples_per_second": 19.649, |
|
"eval_commonsense_qa_steps_per_second": 0.786, |
|
"epoch": 1.32, |
|
"step": 75 |
|
}, |
|
{ |
|
"eval_trivia_qa_loss": 0.7178796529769897, |
|
"eval_trivia_qa_score": -0.25323230028152466, |
|
"eval_trivia_qa_brier_score": 0.25323230028152466, |
|
"eval_trivia_qa_average_probability": 0.5258346796035767, |
|
"eval_trivia_qa_accuracy": 0.56, |
|
"eval_trivia_qa_probabilities": [ |
|
0.6001328825950623, |
|
0.43153026700019836, |
|
0.7303735613822937, |
|
0.4660419523715973, |
|
0.6144695281982422, |
|
0.44857344031333923, |
|
0.13023461401462555, |
|
0.21497581899166107, |
|
0.7541935443878174, |
|
0.3930618166923523, |
|
0.5923050045967102, |
|
0.24356049299240112, |
|
0.5985594391822815, |
|
0.44693607091903687, |
|
0.5665600299835205, |
|
0.37776947021484375, |
|
0.6056777834892273, |
|
0.6600034832954407, |
|
0.506766676902771, |
|
0.3960486650466919, |
|
0.6760654449462891, |
|
0.325588583946228, |
|
0.84026700258255, |
|
0.6310024857521057, |
|
0.41549575328826904, |
|
0.48468682169914246, |
|
0.30185914039611816, |
|
0.4738370478153229, |
|
0.40581652522087097, |
|
0.538021445274353, |
|
0.40905657410621643, |
|
0.5703331232070923, |
|
0.5543002486228943, |
|
0.6837395429611206, |
|
0.7139797806739807, |
|
0.47817718982696533, |
|
0.5026704668998718, |
|
0.6653541922569275, |
|
0.5143362283706665, |
|
0.33803310990333557, |
|
0.5520848035812378, |
|
0.7011743783950806, |
|
0.8836812973022461, |
|
0.6092294454574585, |
|
0.2784689962863922, |
|
0.7969092726707458, |
|
0.6553284525871277, |
|
0.5456470251083374, |
|
0.44695183634757996, |
|
0.548007071018219, |
|
0.5680496692657471, |
|
0.41621971130371094, |
|
0.830963134765625, |
|
0.7895110845565796, |
|
0.4964308738708496, |
|
0.3645930290222168, |
|
0.5637221932411194, |
|
0.44506263732910156, |
|
0.2878129184246063, |
|
0.5505443811416626, |
|
0.494486927986145, |
|
0.36506953835487366, |
|
0.8300395011901855, |
|
0.7011266946792603, |
|
0.348209947347641, |
|
0.626815140247345, |
|
0.7101113796234131, |
|
0.6857610940933228, |
|
0.4028257131576538, |
|
0.6243658065795898, |
|
0.15364059805870056, |
|
0.7464989423751831, |
|
0.5398872494697571, |
|
0.4613312780857086, |
|
0.40898871421813965, |
|
0.2252740114927292, |
|
0.7652521133422852, |
|
0.2925339937210083, |
|
0.4388081431388855, |
|
0.5058109164237976, |
|
0.8158372044563293, |
|
0.7546953558921814, |
|
0.2097131758928299, |
|
0.5340847373008728, |
|
0.25506791472435, |
|
0.537187397480011, |
|
0.592171311378479, |
|
0.5829126238822937, |
|
0.7708747982978821, |
|
0.43876922130584717, |
|
0.45161038637161255, |
|
0.4637928605079651, |
|
0.32175856828689575, |
|
0.6439905762672424, |
|
0.35225629806518555, |
|
0.5903196930885315, |
|
0.8497107625007629, |
|
0.530751645565033, |
|
0.5443508625030518, |
|
0.38598567247390747 |
|
], |
|
"eval_trivia_qa_runtime": 6.9586, |
|
"eval_trivia_qa_samples_per_second": 14.371, |
|
"eval_trivia_qa_steps_per_second": 0.575, |
|
"epoch": 1.32, |
|
"step": 75 |
|
}, |
|
{ |
|
"loss": 0.6169, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.33, |
|
"step": 76 |
|
}, |
|
{ |
|
"loss": 0.4915, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.35, |
|
"step": 77 |
|
}, |
|
{ |
|
"loss": 0.3843, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.37, |
|
"step": 78 |
|
}, |
|
{ |
|
"loss": 0.3157, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.39, |
|
"step": 79 |
|
}, |
|
{ |
|
"loss": 0.4288, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.4, |
|
"step": 80 |
|
}, |
|
{ |
|
"loss": 0.4981, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.42, |
|
"step": 81 |
|
}, |
|
{ |
|
"loss": 0.3743, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.44, |
|
"step": 82 |
|
}, |
|
{ |
|
"loss": 0.4731, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.46, |
|
"step": 83 |
|
}, |
|
{ |
|
"loss": 0.5496, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.47, |
|
"step": 84 |
|
}, |
|
{ |
|
"loss": 0.5248, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.49, |
|
"step": 85 |
|
}, |
|
{ |
|
"loss": 0.3161, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.51, |
|
"step": 86 |
|
}, |
|
{ |
|
"loss": 0.4111, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.53, |
|
"step": 87 |
|
}, |
|
{ |
|
"loss": 0.6771, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.54, |
|
"step": 88 |
|
}, |
|
{ |
|
"loss": 0.3828, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.56, |
|
"step": 89 |
|
}, |
|
{ |
|
"loss": 0.5683, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.58, |
|
"step": 90 |
|
}, |
|
{ |
|
"loss": 0.3922, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.6, |
|
"step": 91 |
|
}, |
|
{ |
|
"loss": 0.3031, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.61, |
|
"step": 92 |
|
}, |
|
{ |
|
"loss": 0.4393, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.63, |
|
"step": 93 |
|
}, |
|
{ |
|
"loss": 0.5812, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.65, |
|
"step": 94 |
|
}, |
|
{ |
|
"loss": 0.3824, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.67, |
|
"step": 95 |
|
}, |
|
{ |
|
"loss": 0.438, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.68, |
|
"step": 96 |
|
}, |
|
{ |
|
"loss": 0.4176, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.7, |
|
"step": 97 |
|
}, |
|
{ |
|
"loss": 0.6267, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.72, |
|
"step": 98 |
|
}, |
|
{ |
|
"loss": 0.3414, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.74, |
|
"step": 99 |
|
}, |
|
{ |
|
"loss": 0.347, |
|
"learning_rate": 0.0002, |
|
"epoch": 1.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_commonsense_qa_loss": 1.1523878574371338, |
|
"eval_commonsense_qa_score": -0.396799772977829, |
|
"eval_commonsense_qa_brier_score": 0.396799772977829, |
|
"eval_commonsense_qa_average_probability": 0.4321339428424835, |
|
"eval_commonsense_qa_accuracy": 0.37, |
|
"eval_commonsense_qa_probabilities": [ |
|
0.5133974552154541, |
|
0.19955140352249146, |
|
0.12726318836212158, |
|
0.2381817102432251, |
|
0.3061620891094208, |
|
0.4511687159538269, |
|
0.031751181930303574, |
|
0.04943360388278961, |
|
0.0996626764535904, |
|
0.7441174387931824, |
|
0.9300025105476379, |
|
0.7207792401313782, |
|
0.00975093338638544, |
|
0.05811166390776634, |
|
0.01687975972890854, |
|
0.6953206658363342, |
|
0.8796400427818298, |
|
0.8870823979377747, |
|
0.7761392593383789, |
|
0.2514343857765198, |
|
0.7701127529144287, |
|
0.8666774034500122, |
|
0.9593339562416077, |
|
0.39567831158638, |
|
0.9251853227615356, |
|
0.956170380115509, |
|
0.5061752796173096, |
|
0.3293822705745697, |
|
0.29882335662841797, |
|
0.24122630059719086, |
|
0.23124100267887115, |
|
0.27979356050491333, |
|
0.7157328724861145, |
|
0.08215536922216415, |
|
0.899575412273407, |
|
0.2522304952144623, |
|
0.3705737888813019, |
|
0.41751977801322937, |
|
0.28646501898765564, |
|
0.7097967267036438, |
|
0.5926461219787598, |
|
0.5077208280563354, |
|
0.1959858238697052, |
|
0.39983221888542175, |
|
0.27750301361083984, |
|
0.40524882078170776, |
|
0.383944571018219, |
|
0.677962064743042, |
|
0.36190852522850037, |
|
0.9803575873374939, |
|
0.7406795620918274, |
|
0.16878190636634827, |
|
0.12593944370746613, |
|
0.17838409543037415, |
|
0.3623591661453247, |
|
0.13244767487049103, |
|
0.3018617331981659, |
|
0.8912862539291382, |
|
0.5757622122764587, |
|
0.3221542537212372, |
|
0.871790885925293, |
|
0.8111267685890198, |
|
0.7601278424263, |
|
0.2897421419620514, |
|
0.7055788040161133, |
|
0.5388709306716919, |
|
0.40989163517951965, |
|
0.4253596067428589, |
|
0.0980086550116539, |
|
0.38305872678756714, |
|
0.3864794969558716, |
|
0.19545888900756836, |
|
0.6149375438690186, |
|
0.4320893883705139, |
|
0.8303354382514954, |
|
0.24265244603157043, |
|
0.13059014081954956, |
|
0.2409209907054901, |
|
0.16863232851028442, |
|
0.040597084909677505, |
|
0.37653404474258423, |
|
0.1172540932893753, |
|
0.21813638508319855, |
|
0.34870657324790955, |
|
0.1515800803899765, |
|
0.12445370852947235, |
|
0.37387892603874207, |
|
0.2328016608953476, |
|
0.5161333680152893, |
|
0.2136731743812561, |
|
0.8099603056907654, |
|
0.08686374127864838, |
|
0.3295922577381134, |
|
0.4922294318675995, |
|
0.5317037105560303, |
|
0.21833769977092743, |
|
0.40150973200798035, |
|
0.6546261310577393, |
|
0.5003038048744202, |
|
0.8764225840568542 |
|
], |
|
"eval_commonsense_qa_runtime": 5.0489, |
|
"eval_commonsense_qa_samples_per_second": 19.806, |
|
"eval_commonsense_qa_steps_per_second": 0.792, |
|
"epoch": 1.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"eval_trivia_qa_loss": 0.6934054493904114, |
|
"eval_trivia_qa_score": -0.24079853296279907, |
|
"eval_trivia_qa_brier_score": 0.24079853296279907, |
|
"eval_trivia_qa_average_probability": 0.5418477654457092, |
|
"eval_trivia_qa_accuracy": 0.65, |
|
"eval_trivia_qa_probabilities": [ |
|
0.6125680208206177, |
|
0.5168354511260986, |
|
0.8149361610412598, |
|
0.6358698010444641, |
|
0.6971873641014099, |
|
0.43288537859916687, |
|
0.07869447767734528, |
|
0.3557625114917755, |
|
0.5231017470359802, |
|
0.6738269925117493, |
|
0.6492495536804199, |
|
0.2834418714046478, |
|
0.32658106088638306, |
|
0.5844005942344666, |
|
0.6492470502853394, |
|
0.7314375042915344, |
|
0.6042511463165283, |
|
0.6276711821556091, |
|
0.576631486415863, |
|
0.386127233505249, |
|
0.6349095702171326, |
|
0.5288182497024536, |
|
0.729836642742157, |
|
0.9058018326759338, |
|
0.5136997103691101, |
|
0.5128685832023621, |
|
0.3874431252479553, |
|
0.5315768122673035, |
|
0.5235913991928101, |
|
0.3415122330188751, |
|
0.6356922388076782, |
|
0.6705965995788574, |
|
0.6255914568901062, |
|
0.5862753987312317, |
|
0.7883551716804504, |
|
0.44843554496765137, |
|
0.7369633913040161, |
|
0.661395788192749, |
|
0.4538457691669464, |
|
0.30895760655403137, |
|
0.7524657249450684, |
|
0.7141547799110413, |
|
0.7967507839202881, |
|
0.5967062711715698, |
|
0.43285977840423584, |
|
0.7750150561332703, |
|
0.5969040393829346, |
|
0.4807274043560028, |
|
0.5844646096229553, |
|
0.33085906505584717, |
|
0.3553105592727661, |
|
0.43021368980407715, |
|
0.6605834364891052, |
|
0.7358336448669434, |
|
0.5644637942314148, |
|
0.49804893136024475, |
|
0.2971603572368622, |
|
0.5024909377098083, |
|
0.3082197606563568, |
|
0.707648515701294, |
|
0.4596785306930542, |
|
0.28572654724121094, |
|
0.7529811859130859, |
|
0.7141095995903015, |
|
0.38188186287879944, |
|
0.5586990714073181, |
|
0.7269975543022156, |
|
0.5075417757034302, |
|
0.48148077726364136, |
|
0.6394890546798706, |
|
0.09881781041622162, |
|
0.7859747409820557, |
|
0.5184196829795837, |
|
0.19931602478027344, |
|
0.6356649398803711, |
|
0.21822792291641235, |
|
0.6228802800178528, |
|
0.13568221032619476, |
|
0.3445551097393036, |
|
0.5373251438140869, |
|
0.830248236656189, |
|
0.7611830830574036, |
|
0.4023577868938446, |
|
0.537299394607544, |
|
0.2600027620792389, |
|
0.6060169339179993, |
|
0.6491380333900452, |
|
0.4489743113517761, |
|
0.7347836494445801, |
|
0.3445013165473938, |
|
0.5936704874038696, |
|
0.6110560297966003, |
|
0.36522528529167175, |
|
0.6722674369812012, |
|
0.40544307231903076, |
|
0.5686450004577637, |
|
0.9408103823661804, |
|
0.5205425024032593, |
|
0.5648188591003418, |
|
0.35458648204803467 |
|
], |
|
"eval_trivia_qa_runtime": 6.9847, |
|
"eval_trivia_qa_samples_per_second": 14.317, |
|
"eval_trivia_qa_steps_per_second": 0.573, |
|
"epoch": 1.75, |
|
"step": 100 |
|
}, |
|
{ |
|
"train_runtime": 538.1504, |
|
"train_samples_per_second": 5.946, |
|
"train_steps_per_second": 0.186, |
|
"total_flos": 0.0, |
|
"train_loss": 0.593766241967678, |
|
"epoch": 1.75, |
|
"step": 100 |
|
} |
|
]] |