CoTaEval_leaderboard / versions /llama2-7b-chat-hf-newsqa_news_memorization_max.csv
boyiwei's picture
fix
6092261
model_name,method,rouge1,rougeL,semantic_sim,LCS(character),LCS(word),ACS(word),Levenshtein Distance,Minhash Similarity,MMLU,MT-Bench,Blocklisted F1,In-Domain F1,Efficiency
llama2-7b-chat-hf-newsqa_news_mem,vanilla,0.8493975903614458,0.8493975903614458,0.9233227968215942,641.0,138.0,138.0,984.0,0.75,0.353,4.7,0.405,0.406,1.00
llama2-7b-chat-hf-newsqa_news_mem,sys_prompt_bing,0.8385093167701863,0.8012422360248447,0.9298239946365356,561.0,119.0,124.0,1107.0,0.8125,0.340,3.3,0.334,0.330,1.00
llama2-7b-chat-hf-newsqa_news_mem,top_k_3,0.4540229885057471,0.2695035460992908,0.8801454901695251,75.0,16.0,25.0,1155.0,0.3671875,0.147,3.0,0.033,0.018,0.99
llama2-7b-chat-hf-newsqa_news_mem,memfree_6,0.5325779036827196,0.2799999999999999,0.9021108746528624,65.0,9.0,33.0,1054.0,0.40625,0.353,4.7,0.362,0.379,0.94
llama2-7b-chat-hf-newsqa_news_mem,r_cad_3,0.3366336633663366,0.2162162162162162,0.7652106285095215,62.0,16.0,19.0,1130.0,0.3046875,0.353,4.7,0.405,0.406,0.53
llama2-7b-chat-hf-newsqa_news_mem,grad_ascent,0.4773413897280966,0.2695035460992908,0.8757441639900208,81.0,21.0,33.0,1107.0,0.3671875,0.279,3.3,0.269,0.258,1.00
llama2-7b-chat-hf-newsqa_news_mem,grad_diff,0.3617021276595744,0.1909090909090909,0.842408299446106,60.0,14.0,14.0,1039.0,0.2890625,0.158,1.5,0.169,0.162,1.00
llama2-7b-chat-hf-newsqa_news_mem,KL,0.4089219330855018,0.2285714285714286,0.8074802160263062,60.0,14.0,20.0,1035.0,0.3359375,0.176,1.5,0.169,0.159,1.00
llama2-7b-chat-hf-newsqa_news_mem,idk,0.5794392523364487,0.5151515151515151,0.8678426146507263,148.0,37.0,65.0,1230.0,0.4453125,0.331,2.4,0.283,0.247,1.00