ai-forever commited on
Commit
014bf66
·
verified ·
1 Parent(s): b78764f

add new models

Browse files
results/LLaMA-3.1-8B-Instruct.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"passkey": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 1.0, "dataset_total_score": 1.0}, "matreshka_yes_no": {"4k": 0.8361204013377926, "8k": 0.7366666666666667, "16k": 0.7566666666666667, "32k": 0.66, "64k": 0.63, "128k": 0.55, "dataset_total_score": 0.6949089557785211}, "matreshka_names": {"4k": 0.56, "8k": 0.52, "16k": 0.38, "32k": 0.38666666666666666, "64k": 0.30666666666666664, "128k": 0.23333333333333334, "dataset_total_score": 0.3977777777777778}, "passkey_with_librusec": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 1.0, "dataset_total_score": 1.0}, "librusec_history": {"8k": 0.65625, "16k": 0.65625, "32k": 0.625, "64k": 0.65625, "dataset_total_score": 0.6484375}, "ru_gsm100": {"16k": 0.23, "dataset_total_score": 0.23}, "ru_sci_passage_count": {"4k": 0.185, "8k": 0.07, "16k": 0.05, "32k": 0.02, "64k": 0.0, "128k": 0.01, "dataset_total_score": 0.05583333333333334}, "ru_2wikimultihopqa": {"8k": 0.2857142857142857, "16k": 0.328125, "32k": 0.21951219512195122, "dataset_total_score": 0.2777838269454123}, "long_context_multiq": {"8k": 0.27, "16k": 0.035, "4k": 0.065, "64k": 0.005, "32k": 0.0, "128k": 0.1, "dataset_total_score": 0.07916666666666666}, "ru_sci_abstract_retrieval": {"4k": 0.9832142857142858, "8k": 0.9310686932115504, "16k": 0.9261840800423767, "32k": 0.8304544464405875, "64k": 0.6458243484988054, "128k": 0.3264052378026478, "dataset_total_score": 0.7738585152850423}, "ru_trec": {"4k": 0.5675675675675675, "8k": 0.46, "16k": 0.6263736263736264, "32k": 0.5983606557377049, "dataset_total_score": 0.5630754624197247}, "ru_sci_fi": {"32k": 0.2222222222222222, "64k": 0.32142857142857145, "dataset_total_score": 0.27182539682539686}, "librusec_mhqa": {"8k": 0.3177083333333333, "dataset_total_score": 0.3177083333333333}, "ru_babilong_qa1": {"4k": 0.6580833333333334, "8k": 0.635, "16k": 0.593125, "32k": 0.53625, "64k": 0.4474166666666667, "128k": 0.47875, "dataset_total_score": 0.5581041666666667}, "ru_babilong_qa2": {"4k": 0.396875, "8k": 0.246875, "16k": 0.24491666666666667, "32k": 0.2780416666666667, "64k": 0.14616666666666667, "128k": 0.13, "dataset_total_score": 0.24047916666666666}, "ru_babilong_qa3": {"4k": 0.28979166666666667, "8k": 0.3107857142857143, "16k": 0.24823809523809526, "32k": 0.2512261904761905, "64k": 0.16870833333333335, "128k": 0.1666190476190476, "dataset_total_score": 0.23922817460317458}, "ru_babilong_qa4": {"4k": 0.115, "8k": 0.14571428571428574, "16k": 0.14285714285714288, "32k": 0.15642857142857144, "64k": 0.16714285714285715, "128k": 0.11, "dataset_total_score": 0.13952380952380952}, "ru_babilong_qa5": {"4k": 0.6483333333333334, "8k": 0.5883333333333334, "16k": 0.5466666666666667, "32k": 0.5633333333333334, "64k": 0.6166666666666667, "128k": 0.59, "dataset_total_score": 0.5922222222222223}, "ru_quality": {"16k": 0.3954451345755694, "8k": 0.4471544715447155, "dataset_total_score": 0.4212998030601425}, "ru_tpo": {"8k": 0.8154050464807437, "dataset_total_score": 0.8154050464807437}, "ru_qasper": {"16k": 0.07217324214881427, "8k": 0.08332129217427023, "32k": 0.04010014013685399, "dataset_total_score": 0.0651982248199795}, "total_score": 0.4467541134480291}
results/LLaMA-3.1-8B.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"passkey": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 1.0, "dataset_total_score": 1.0}, "matreshka_yes_no": {"4k": 0.4080267558528428, "8k": 0.42, "16k": 0.44, "32k": 0.44666666666666666, "64k": 0.4, "128k": 0.28, "dataset_total_score": 0.39911557041991824}, "matreshka_names": {"4k": 0.46, "8k": 0.34, "16k": 0.23333333333333334, "32k": 0.16666666666666666, "64k": 0.08, "128k": 0.06666666666666667, "dataset_total_score": 0.22444444444444447}, "passkey_with_librusec": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 1.0, "dataset_total_score": 1.0}, "librusec_history": {"8k": 0.96875, "16k": 0.96875, "32k": 0.9375, "64k": 0.9375, "dataset_total_score": 0.953125}, "ru_gsm100": {"16k": 0.2, "dataset_total_score": 0.2}, "ru_sci_passage_count": {"4k": 0.13, "8k": 0.04, "16k": 0.04, "32k": 0.02, "64k": 0.0, "128k": 0.013333333333333332, "dataset_total_score": 0.04055555555555556}, "ru_2wikimultihopqa": {"8k": 0.3673469387755102, "16k": 0.3828125, "32k": 0.25203252032520324, "dataset_total_score": 0.33406398636690443}, "long_context_multiq": {"8k": 0.245, "16k": 0.03, "4k": 0.045, "64k": 0.005, "32k": 0.0, "128k": 0.035, "dataset_total_score": 0.06}, "ru_sci_abstract_retrieval": {"4k": 0.971547619047619, "8k": 0.9226294340580055, "16k": 0.8926773863981885, "32k": 0.737346388706189, "64k": 0.6493593352514232, "128k": 0.2430637216014266, "dataset_total_score": 0.7361039808438087}, "ru_trec": {"4k": 0.5405405405405406, "8k": 0.58, "16k": 0.6263736263736264, "32k": 0.6475409836065574, "dataset_total_score": 0.598613787630181}, "ru_sci_fi": {"32k": 0.027777777777777776, "64k": 0.03571428571428571, "dataset_total_score": 0.031746031746031744}, "librusec_mhqa": {"8k": 0.453125, "dataset_total_score": 0.453125}, "ru_babilong_qa1": {"4k": 0.6274583333333333, "8k": 0.6125, "16k": 0.618125, "32k": 0.514375, "64k": 0.57625, "128k": 0.2874583333333333, "dataset_total_score": 0.5393611111111111}, "ru_babilong_qa2": {"4k": 0.4447916666666667, "8k": 0.3418333333333334, "16k": 0.181125, "32k": 0.21991666666666668, "64k": 0.2025, "128k": 0.134375, "dataset_total_score": 0.2540902777777778}, "ru_babilong_qa3": {"4k": 0.3370833333333333, "8k": 0.34199999999999997, "16k": 0.34662499999999996, "32k": 0.26304166666666673, "64k": 0.2912916666666667, "128k": 0.19883333333333336, "dataset_total_score": 0.2964791666666667}, "ru_babilong_qa4": {"4k": 0.6778571428571429, "8k": 0.6585714285714286, "16k": 0.6308571428571429, "32k": 0.5071428571428572, "64k": 0.43071428571428577, "128k": 0.21857142857142858, "dataset_total_score": 0.5206190476190476}, "ru_babilong_qa5": {"4k": 0.695, "8k": 0.7083333333333335, "16k": 0.6783333333333332, "32k": 0.6916666666666668, "64k": 0.6916666666666668, "128k": 0.6100000000000001, "dataset_total_score": 0.6791666666666667}, "ru_quality": {"16k": 0.028985507246376812, "8k": 0.21138211382113825, "dataset_total_score": 0.12018381053375753}, "ru_tpo": {"8k": 0.4980079681274901, "dataset_total_score": 0.4980079681274901}, "ru_qasper": {"16k": 0.05029792951580615, "8k": 0.05657815571812267, "32k": 0.022286589880109883, "dataset_total_score": 0.04305422503801291}, "total_score": 0.4277074109784464}
results/Mistral-Nemo-Instruct-2407.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"passkey": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 1.0, "64k": 1.0, "128k": 0.87, "dataset_total_score": 0.9783333333333334}, "matreshka_yes_no": {"4k": 0.6555183946488294, "8k": 0.5366666666666666, "16k": 0.5, "32k": 0.5, "64k": 0.5, "128k": 0.5, "dataset_total_score": 0.5320308435525827}, "matreshka_names": {"4k": 0.6133333333333333, "8k": 0.5, "16k": 0.43333333333333335, "32k": 0.29333333333333333, "64k": 0.07333333333333333, "128k": 0.02, "dataset_total_score": 0.3222222222222222}, "passkey_with_librusec": {"4k": 1.0, "8k": 1.0, "16k": 1.0, "32k": 0.995, "64k": 1.0, "128k": 0.97, "dataset_total_score": 0.9941666666666666}, "librusec_history": {"8k": 0.59375, "16k": 0.5625, "32k": 0.5, "64k": 0.46875, "dataset_total_score": 0.53125}, "ru_gsm100": {"16k": 0.0, "dataset_total_score": 0.0}, "ru_sci_passage_count": {"4k": 0.57, "8k": 0.1, "16k": 0.05, "32k": 0.03, "64k": 0.0, "128k": 0.02, "dataset_total_score": 0.12833333333333333}, "ru_2wikimultihopqa": {"8k": 0.3469387755102041, "16k": 0.3125, "32k": 0.17886178861788618, "dataset_total_score": 0.27943352137603006}, "long_context_multiq": {"8k": 0.23, "16k": 0.025, "4k": 0.05, "64k": 0.005, "32k": 0.0, "128k": 0.0, "dataset_total_score": 0.05166666666666667}, "ru_sci_abstract_retrieval": {"4k": 0.9843253968253969, "8k": 0.9340110286538859, "16k": 0.882861713382518, "32k": 0.7548755510867896, "64k": 0.28580245595851705, "128k": 0.07901305389621614, "dataset_total_score": 0.6534815333005539}, "ru_trec": {"4k": 0.4864864864864865, "8k": 0.66, "16k": 0.7142857142857143, "32k": 0.47540983606557374, "dataset_total_score": 0.5840455092094436}, "ru_sci_fi": {"32k": 0.2222222222222222, "64k": 0.25, "dataset_total_score": 0.2361111111111111}, "librusec_mhqa": {"8k": 0.2994791666666667, "dataset_total_score": 0.2994791666666667}, "ru_babilong_qa1": {"4k": 0.71, "8k": 0.73, "16k": 0.59, "32k": 0.6, "64k": 0.46, "128k": 0.19, "dataset_total_score": 0.5466666666666666}, "ru_babilong_qa2": {"4k": 0.28, "8k": 0.26, "16k": 0.19, "32k": 0.16, "64k": 0.11, "128k": 0.04, "dataset_total_score": 0.17333333333333334}, "ru_babilong_qa3": {"4k": 0.22, "8k": 0.28, "16k": 0.14, "32k": 0.14, "64k": 0.11, "128k": 0.07, "dataset_total_score": 0.16}, "ru_babilong_qa4": {"4k": 0.16928571428571432, "8k": 0.18714285714285717, "16k": 0.20142857142857143, "32k": 0.1635714285714286, "64k": 0.02, "128k": 0.0, "dataset_total_score": 0.1235714285714286}, "ru_babilong_qa5": {"4k": 0.48, "8k": 0.5, "16k": 0.49, "32k": 0.4566666666666667, "64k": 0.43, "128k": 0.3966666666666667, "dataset_total_score": 0.45888888888888896}, "ru_quality": {"16k": 0.6811594202898551, "8k": 0.6585365853658537, "dataset_total_score": 0.6698480028278544}, "ru_tpo": {"8k": 0.7675962815405047, "dataset_total_score": 0.7675962815405047}, "ru_qasper": {"16k": 0.28639727595549347, "8k": 0.3018572041274937, "32k": 0.14594086192630237, "dataset_total_score": 0.2447317806697632}, "total_score": 0.41596144237795474}
results/Phi-3-mini-128k-instruct.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"passkey": {"4k": 0.1, "8k": 0.995, "16k": 1.0, "32k": 1.0, "64k": 0.99, "128k": 0.995, "dataset_total_score": 0.8466666666666667}, "matreshka_yes_no": {"4k": 0.8595317725752508, "8k": 0.6833333333333333, "16k": 0.74, "32k": 0.68, "64k": 0.6566666666666666, "128k": 0.62, "dataset_total_score": 0.7065886287625419}, "matreshka_names": {"4k": 0.30666666666666664, "8k": 0.21333333333333335, "16k": 0.19333333333333333, "32k": 0.18, "64k": 0.16, "128k": 0.07333333333333333, "dataset_total_score": 0.18777777777777774}, "passkey_with_librusec": {"4k": 0.235, "8k": 0.995, "16k": 1.0, "32k": 1.0, "64k": 0.97, "128k": 0.93, "dataset_total_score": 0.855}, "librusec_history": {"8k": 0.46875, "16k": 0.4375, "32k": 0.375, "64k": 0.375, "dataset_total_score": 0.4140625}, "ru_gsm100": {"16k": 0.24, "dataset_total_score": 0.24}, "ru_sci_passage_count": {"4k": 0.21, "8k": 0.08, "16k": 0.05, "32k": 0.02, "64k": 0.0, "128k": 0.01, "dataset_total_score": 0.06166666666666667}, "ru_2wikimultihopqa": {"8k": 0.1836734693877551, "16k": 0.203125, "32k": 0.17886178861788618, "dataset_total_score": 0.18855341933521375}, "long_context_multiq": {"8k": 0.225, "16k": 0.035, "4k": 0.035, "64k": 0.005, "32k": 0.0, "128k": 0.01, "dataset_total_score": 0.05166666666666667}, "ru_sci_abstract_retrieval": {"4k": 0.16666666666666666, "8k": 0.45602797995655137, "16k": 0.406087639083534, "32k": 0.4008502386409311, "64k": 0.23707393032062402, "128k": 0.0892775698342537, "dataset_total_score": 0.29266400408376014}, "ru_trec": {"4k": 0.08108108108108109, "8k": 0.06, "16k": 0.24175824175824176, "32k": 0.30327868852459017, "dataset_total_score": 0.17152950284097826}, "ru_sci_fi": {"32k": 0.0, "64k": 0.0, "dataset_total_score": 0.0}, "librusec_mhqa": {"8k": 0.13802083333333334, "dataset_total_score": 0.13802083333333334}, "ru_babilong_qa1": {"4k": 0.14, "8k": 0.44, "16k": 0.42, "32k": 0.35, "64k": 0.33, "128k": 0.15, "dataset_total_score": 0.305}, "ru_babilong_qa2": {"4k": 0.01, "8k": 0.12, "16k": 0.18, "32k": 0.08, "64k": 0.08, "128k": 0.06, "dataset_total_score": 0.08833333333333333}, "ru_babilong_qa3": {"4k": 0.05, "8k": 0.14, "16k": 0.07, "32k": 0.13, "64k": 0.1, "128k": 0.05, "dataset_total_score": 0.09000000000000001}, "ru_babilong_qa4": {"4k": 0.0, "8k": 0.05, "16k": 0.01, "32k": 0.0, "64k": 0.0, "128k": 0.0, "dataset_total_score": 0.01}, "ru_babilong_qa5": {"4k": 0.10666666666666666, "8k": 0.4866666666666667, "16k": 0.5000000000000001, "32k": 0.55, "64k": 0.5566666666666666, "128k": 0.44666666666666666, "dataset_total_score": 0.4411111111111112}, "ru_quality": {"16k": 0.33747412008281574, "8k": 0.43902439024390244, "dataset_total_score": 0.38824925516335906}, "ru_tpo": {"8k": 0.40903054448871184, "dataset_total_score": 0.40903054448871184}, "ru_qasper": {"16k": 0.043349249250495984, "8k": 0.04746257838212318, "32k": 0.01566630824976973, "dataset_total_score": 0.03549271196079629}, "total_score": 0.2819720772471865}
results/T-lite-instruct-0.1.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"passkey": {"4k": 1.0, "8k": 1.0, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.3333333333333333}, "matreshka_yes_no": {"4k": 0.8294314381270903, "8k": 0.7133333333333334, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.2571274619100706}, "matreshka_names": {"4k": 0.49333333333333335, "8k": 0.3466666666666667, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.14}, "passkey_with_librusec": {"4k": 1.0, "8k": 1.0, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.3333333333333333}, "librusec_history": {"8k": 0.90625, "16k": 0, "32k": 0, "64k": 0, "dataset_total_score": 0.2265625}, "ru_gsm100": {"16k": 0, "dataset_total_score": 0.0}, "ru_sci_passage_count": {"4k": 0.24, "8k": 0.065, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.050833333333333335}, "ru_2wikimultihopqa": {"8k": 0.3877551020408163, "16k": 0, "32k": 0, "dataset_total_score": 0.1292517006802721}, "long_context_multiq": {"8k": 0.245, "4k": 0.065, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.051666666666666666}, "ru_sci_abstract_retrieval": {"4k": 0.9474376417233561, "8k": 0.9261452832881405, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.31226382083524945}, "ru_trec": {"4k": 0.6486486486486487, "8k": 0.58, "16k": 0, "32k": 0, "dataset_total_score": 0.3071621621621622}, "ru_sci_fi": {"32k": 0, "64k": 0, "dataset_total_score": 0.0}, "librusec_mhqa": {"8k": 0.484375, "dataset_total_score": 0.484375}, "ru_babilong_qa1": {"4k": 0.68, "8k": 0.62, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.21666666666666667}, "ru_babilong_qa2": {"4k": 0.52, "8k": 0.35, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.145}, "ru_babilong_qa3": {"4k": 0.22620833333333334, "8k": 0.26740476190476187, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.0822688492063492}, "ru_babilong_qa4": {"4k": 0.6242857142857143, "8k": 0.7128571428571427, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.22285714285714286}, "ru_babilong_qa5": {"4k": 0.7433333333333334, "8k": 0.7233333333333334, "16k": 0, "32k": 0, "64k": 0, "128k": 0, "dataset_total_score": 0.24444444444444446}, "ru_quality": {"8k": 0.21951219512195122, "16k": 0, "dataset_total_score": 0.10975609756097561}, "ru_tpo": {"8k": 0.7503320053120849, "dataset_total_score": 0.7503320053120849}, "ru_qasper": {"8k": 0.08197389867894705, "16k": 0, "32k": 0, "dataset_total_score": 0.027324632892982353}, "total_score": 0.21069329291405084}