Junetheriver commited on
Commit
32e04fa
·
1 Parent(s): 9c18c8e

update 05-15

Browse files
app.py CHANGED
@@ -25,7 +25,7 @@ def create_lang_tabs(lang, lang_cates):
25
  for dataset, cates in lang_cates:
26
  dataset_dt = {}
27
  for cat in cates:
28
- leaderboard_df = pd.read_csv(f'./data/{dataset}_{lang}_{cat}.csv')
29
  dataset_dt[cat] = leaderboard_df
30
  df_dict[dataset] = dataset_dt
31
  return df_dict
@@ -56,6 +56,8 @@ def process_mc_df(df, shot=None):
56
  df = df.sort_values(by="BestScore", ascending=False)
57
  # reset_index
58
  df = df.reset_index()
 
 
59
  return df
60
 
61
  def process_qa_df(df):
@@ -141,6 +143,7 @@ def launch_gradio():
141
  with demo:
142
  gr.HTML(TITLE)
143
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
144
  for key, dict in dict_lang.items():
145
  with gr.Tab(key):
146
  create_lang_leader_board(dict)
 
25
  for dataset, cates in lang_cates:
26
  dataset_dt = {}
27
  for cat in cates:
28
+ leaderboard_df = pd.read_csv(f'./data_v2/{dataset}_{lang}_{cat}_gen.csv')
29
  dataset_dt[cat] = leaderboard_df
30
  df_dict[dataset] = dataset_dt
31
  return df_dict
 
56
  df = df.sort_values(by="BestScore", ascending=False)
57
  # reset_index
58
  df = df.reset_index()
59
+ # 对于所有空的值,填充为'/'
60
+ df = df.fillna('/')
61
  return df
62
 
63
  def process_qa_df(df):
 
143
  with demo:
144
  gr.HTML(TITLE)
145
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
146
+ gr.Markdown("""# 🏅 Leaderboard \n Latest update: 2024-05-15\n""", elem_classes="markdown-text")
147
  for key, dict in dict_lang.items():
148
  with gr.Tab(key):
149
  create_lang_leader_board(dict)
data_v2/bosc_zh_mc_gen.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,37.5,40.0,47.5,52.5,37.5,37.5,42.5,45.0
3
+ Chatglm3-6B,35.0,35.0,50.0,50.0,47.5,47.5,45.0,45.0
4
+ Devops-Model-14B-Chat,35.0,27.5,37.5,52.5,50.0,50.0,55.0,62.5
5
+ Ernie-Bot-4.0,57.5,57.5,60.0,60.0,52.5,52.5,57.5,57.5
6
+ Gpt-3.5-Turbo,50.0,47.5,55.0,55.0,40.0,40.0,50.0,55.0
7
+ GPT-4,57.5,57.5,57.5,57.5,52.5,52.5,62.5,62.5
8
+ Internlm2-Chat-20B,47.5,47.5,,,47.5,47.5,,
9
+ Internlm2-Chat-7B,60.0,60.0,57.5,57.5,55.0,55.0,62.5,62.5
10
+ Llama-2-13B,42.5,42.5,50.0,50.0,50.0,50.0,42.5,42.5
11
+ Llama-2-70B-Chat,0.0,0.0,57.5,57.5,25.0,25.0,45.0,45.0
12
+ Llama-2-7B,32.5,32.5,45.0,45.0,45.0,45.0,45.0,45.0
13
+ Mistral-7B,0.0,0.0,37.5,37.5,20.0,20.0,50.0,50.0
14
+ Qwen-14B-Chat,47.5,45.0,50.0,47.5,50.0,47.5,55.0,57.5
15
+ Qwen-72B-Chat,50.0,50.0,47.5,47.5,45.0,45.0,60.0,60.0
16
+ Yi-34B-Chat,55.0,55.0,60.0,67.5,50.0,50.0,52.5,55.0
17
+ gemma_2b,37.5,37.5,40.0,40.0,32.5,32.5,40.0,40.0
18
+ gemma_7b,32.5,32.5,62.5,62.5,40.0,40.0,50.0,50.0
19
+ Qwen1.5-14B-Base,47.5,52.85714285714286,50.0,47.14285714285714,47.5,52.85714285714286,45.0,30.0
20
+ Qwen1.5-14B-Chat,45.0,47.5,60.0,50.0,52.5,47.5,60.0,45.0
21
+ Qwen1.5-14B-Chat,,47.5,,72.5,,55.0,,60.0
data_v2/bosc_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,54.285714285714285,65.71428571428571
3
+ Meta-Llama-3-8B-Instruct,42.857142857142854,51.42857142857142
data_v2/bosc_zh_qa_gen.csv ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,Faithfulness,Answer_Relevancy,Answer_Correctness,Answer_Similarity
2
+ Yi-6B-Chat,0.7727272727272728,0.39624895698624696,0.7542070590484817,0.8510823274180981
3
+ Internlm2-Chat-20B,0.4974937343358396,0.23934623278394898,0.6204328631578558,0.8259980417246064
4
+ Yi-6B,0.14285714285714285,0.5190011151439459,0.613405924657078,0.7862767598528019
5
+ Qwen1.5-0.5B-Chat,0.42857142857142855,0.488269773660657,0.5753016779392393,0.8382536957664072
6
+ Baichuan2-13B-Chat,0.2857142857142857,0.7405192357284925,0.5708584068359933,0.8691557081776934
7
+ Internlm2-Chat-7B,0.5,0.22600361749502174,0.570841424928178,0.835409602287096
8
+ Gemma-2B,0.011904761904761904,0.2803251363458862,0.557515859867883,0.8041128629884543
9
+ Qwen1.5-4B-Chat,0.42857142857142855,0.7503365683315548,0.5549539275574193,0.8483871388011062
10
+ Vicuna-13B-V1.5,0.7223381250590647,0.5398926752022207,0.5186514512565293,0.6634719608098028
11
+ Yi-9B,0.25,0.31590509156704094,0.4925135259219739,0.7803641425669777
12
+ Baichuan2-7B-Chat,0.42857142857142855,0.5020120070047182,0.4880968089375992,0.8529672546477044
13
+ Vicuna-7B-V1.5,0.14285714285714285,0.5019194353900974,0.4722927782884535,0.8320120749350508
14
+ Gpt-3.5-Turbo,0.6428571428571429,0.7368772770427375,0.46705283670036873,0.8518465563087741
15
+ Qwen1.5-1.8B-Chat,0.40816326530612246,0.6187149363151768,0.46468776283363905,0.8536228462063511
16
+ Qwen1.5-14B-Chat,0.5,0.62189263577569,0.44546489401960593,0.8573697801600568
17
+ Qwen1.5-7B-Chat,0.5714285714285714,0.6304668631060011,0.4380453591507842,0.8549025250385108
18
+ Gemma-7B,0.25,0.09449475661345018,0.4189708246631338,0.7939624827076257
19
+ Mistral-7B,0.14285714285714285,0.10603847794715389,0.3972735247665132,0.7962369562089103
data_v2/dfcdata_zh_mc_gen.csv ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,64.79,66.2,68.31,73.24,62.68,64.08,68.31,66.2
3
+ Chatglm3-6B,55.63380282,55.63380282,61.97183099,61.97183099,51.4084507,51.4084507,57.04225352,57.04225352
4
+ Devops-Model-14B-Chat,33.8,34.51,54.23,56.34,80.99,78.87,51.41,63.38
5
+ Ernie-Bot-4.0,81.0,81.0,82.0,82.0,83.0,83.0,85.0,85.0
6
+ Gpt-3.5-Turbo,77.46,76.06,82.39,81.69,71.13,73.24,80.28,78.87
7
+ GPT-4,85.21,85.21,86.62,86.62,82.39,82.39,90.14,90.14
8
+ Internlm2-Chat-20B,74.64788732,74.64788732,74.64788732,74.64788732,78.16901408,78.16901408,,
9
+ Internlm2-Chat-7B,76.05633803,76.05633803,73.94366197,73.94366197,74.64788732,74.64788732,57.04225352,57.04225352
10
+ Llama-2-13B,45.77,45.77,70.42,70.42,61.97,61.97,61.27,61.27
11
+ Llama-2-70B-Chat,14.79,14.79,67.61,67.61,41.55,40.85,72.54,72.54
12
+ Llama-2-7B,30.28,30.28,45.77,45.77,45.07,45.07,61.97,61.97
13
+ Mistral-7B,2.82,2.82,64.79,64.79,16.9,16.9,64.08,64.08
14
+ Qwen-14B-Chat,73.94,73.94,73.24,76.76,76.06,74.65,69.01,71.83
15
+ Qwen-72B-Chat,86.62,86.62,83.8,83.8,83.8,83.8,83.8,83.8
16
+ Yi-34B-Chat,78.87,80.28,85.92,86.62,86.62,86.62,76.06,85.21
17
+ gemma_2b,28.16901,28.16901,38.02817,38.02817,27.46479,27.46479,41.5493,41.5493
18
+ gemma_7b,35.91549,35.91549,59.15493,59.15493,50.70423,50.70423,66.90141,66.90141
19
+ Qwen1.5-14B-Base,73.23944,73.23944,76.05634,76.05634,81.69014,81.69014,57.04225,57.04225
20
+ Qwen1.5-14B-Chat,75.35211,76.05634,80.28169,83.09859,83.80282,80.98592,78.87324,80.98592
data_v2/gtja_zh_mc_gen.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,41.76,41.76,53.85,60.44,38.46,38.46,49.45,51.65
3
+ Chatglm3-6B,43.95604396,43.95604396,53.84615385,53.84615385,43.95604396,43.95604396,47.25274725,47.25274725
4
+ Devops-Model-14B-Chat,41.76,38.46,45.05,49.45,61.54,59.34,52.75,63.74
5
+ Ernie-Bot-4.0,68.13,68.13,64.84,64.84,65.93,65.93,68.13,68.13
6
+ Gpt-3.5-Turbo,49.45,52.75,59.34,62.64,47.25,52.75,57.14,58.24
7
+ GPT-4,68.13,68.13,67.03,67.03,70.33,70.33,71.43,71.43
8
+ Internlm2-Chat-20B,56.04395604,56.04395604,,,65.93406593,65.93406593,,
9
+ Internlm2-Chat-7B,56.04395604,56.04395604,59.34065934,59.34065934,54.94505495,54.94505495,51.64835165,51.64835165
10
+ Llama-2-13B,30.77,30.77,47.25,47.25,47.25,47.25,42.86,42.86
11
+ Llama-2-70B-Chat,6.59,6.59,48.35,48.35,19.78,19.78,49.45,49.45
12
+ Llama-2-7B,28.57,28.57,45.05,45.05,42.86,42.86,45.05,45.05
13
+ Mistral-7B,5.49,5.49,47.25,47.25,14.29,14.29,38.46,38.46
14
+ Qwen-14B-Chat,47.25,47.25,53.85,54.95,54.95,54.95,59.34,61.54
15
+ Qwen-72B-Chat,71.43,71.43,67.03,67.03,70.33,70.33,74.73,74.73
16
+ Yi-34B-Chat,71.43,74.73,71.43,73.63,69.23,70.33,49.45,47.25
17
+ gemma_2b,30.76923,30.76923,43.95604,43.95604,32.96703,32.96703,29.67033,29.67033
18
+ gemma_7b,29.67033,29.67033,56.04396,56.04396,34.06593,34.06593,50.54945,50.54945
19
+ Qwen1.5-14B-Base,53.84615,36.550632911392405,63.73626,38.08016877637131,68.13187,43.24894514767932,42.85714,34.28270042194093
20
+ Qwen1.5-14B-Chat,56.04396,53.84615,67.03297,63.73626,59.34066,68.13187,60.43956,42.85714
21
+ Qwen1.5-14B-Chat,,54.94505,,68.13187,,57.14286,,62.63736
data_v2/gtja_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,67.08860759493672,70.88607594936708
3
+ Meta-Llama-3-8B-Instruct,43.037974683544306,50.63291139240506
data_v2/huaweicloud_zh_mc_gen.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,6.67,10.0,23.33,23.33,16.67,20.0,26.67,30.0
3
+ Chatglm3-6B,13.33333333,13.33333333,16.66666667,16.66666667,6.666666667,6.666666667,13.33333333,13.33333333
4
+ Devops-Model-14B-Chat,16.67,16.67,33.33,13.33,40.0,40.0,20.0,23.33
5
+ Ernie-Bot-4.0,16.67,16.67,20.0,20.0,36.67,36.67,23.33,23.33
6
+ Gpt-3.5-Turbo,13.33,13.33,20.0,26.67,20.0,20.0,16.67,23.33
7
+ GPT-4,20.0,20.0,20.0,20.0,43.33,43.33,46.67,46.67
8
+ Internlm2-Chat-20B,13.33333333,13.33333333,20.0,20.0,16.66666667,16.66666667,,
9
+ Internlm2-Chat-7B,43.33333333,43.33333333,23.33333333,23.33333333,30.0,30.0,40.0,40.0
10
+ Llama-2-13B,10.0,10.0,20.0,20.0,26.67,26.67,13.33,13.33
11
+ Llama-2-70B-Chat,3.33,3.33,20.0,20.0,23.33,23.33,16.67,16.67
12
+ Llama-2-7B,10.0,10.0,26.67,26.67,16.67,16.67,33.33,33.33
13
+ Mistral-7B,0.0,0.0,23.33,23.33,0.0,0.0,16.67,16.67
14
+ Qwen-14B-Chat,13.33,13.33,20.0,26.67,40.0,30.0,26.67,33.33
15
+ Qwen-72B-Chat,36.67,36.67,33.33,33.33,43.33,43.33,33.33,36.67
16
+ Yi-34B-Chat,36.67,40.0,36.67,30.0,50.0,46.67,30.0,43.33
17
+ gemma_2b,26.66667,26.66667,10.0,10.0,26.66667,26.66667,20.0,20.0
18
+ gemma_7b,3.333333,3.333333,23.33333,23.33333,13.33333,13.33333,30.0,30.0
19
+ Qwen1.5-14B-Base,20.0,27.5,33.33333,22.5,20.0,30.0,30.0,30.0
20
+ Qwen1.5-14B-Chat,23.33333,20.0,13.33333,33.33333,26.66667,20.0,20.0,30.0
21
+ Qwen1.5-14B-Chat,,26.66667,,13.33333,,26.66667,,30.0
data_v2/huaweicloud_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,45.0,45.0
3
+ Meta-Llama-3-8B-Instruct,25.0,20.0
data_v2/lenovo_en_mc_gen.csv ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Chatglm3-6B,60.0,60.0,60.0,60.0,55.0,55.0,60.0,60.0
3
+ Internlm2-Chat-20B,62.5,62.5,,,75.0,75.0,,
4
+ Internlm2-Chat-7B,65.0,65.0,67.5,67.5,75.0,75.0,57.5,57.5
5
+ gemma_2b,22.5,22.5,47.5,47.5,30.0,30.0,37.5,37.5
6
+ gemma_7b,32.5,32.5,65.0,65.0,35.0,35.0,65.0,65.0
7
+ Qwen1.5-14B-Base,67.5,67.5,70.0,70.0,72.5,72.5,50.0,50.0
8
+ Qwen1.5-14B-Chat,67.5,67.5,70.0,70.0,72.5,65.0,77.5,77.5
data_v2/lenovo_zh_mc_gen.csv ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,65.0,60.0,72.5,67.5,62.5,60.0,70.0,67.5
3
+ Chatglm3-6B,60.0,60.0,60.0,60.0,55.0,55.0,60.0,60.0
4
+ Devops-Model-14B-Chat,60.0,67.5,65.0,57.5,67.5,70.0,62.5,70.0
5
+ Ernie-Bot-4.0,75.0,75.0,77.5,77.5,75.0,75.0,82.5,82.5
6
+ Gpt-3.5-Turbo,60.0,62.5,65.0,70.0,57.5,57.5,62.5,62.5
7
+ GPT-4,77.5,77.5,82.5,82.5,77.5,77.5,82.5,82.5
8
+ Llama-2-13B,45.0,45.0,62.5,62.5,60.0,60.0,55.0,55.0
9
+ Llama-2-70B-Chat,22.5,22.5,75.0,75.0,20.0,20.0,57.5,57.5
10
+ Llama-2-7B,32.5,32.5,45.0,45.0,60.0,60.0,55.0,55.0
11
+ Mistral-7B,47.5,47.5,62.5,62.5,35.0,35.0,60.0,60.0
12
+ Qwen-14B-Chat,70.0,67.5,70.0,67.5,70.0,65.0,65.0,67.5
13
+ Qwen-72B-Chat,72.5,72.5,75.0,75.0,75.0,75.0,75.0,75.0
14
+ Yi-34B-Chat,75.0,75.0,87.5,82.5,62.5,57.5,52.5,52.5
15
+ Meta-Llama-3-8B-Instruct,47.14285714285714,47.14285714285714,44.285714285714285,44.285714285714285,45.714285714285715,45.714285714285715,32.857142857142854,32.857142857142854
data_v2/lenovo_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,82.85714285714286,85.71428571428571
3
+ Meta-Llama-3-8B-Instruct,62.857142857142854,80.0
data_v2/network_en_mc_gen.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Aquilachat2-34B,36.63,36.63,44.83,44.83,46.65,46.65,,
3
+ Baichuan-13B-Chat,18.3,20.4,28.6,37.0,24.1,26.7,18.2,17.8
4
+ Baichuan2-13B-Chat,14.1,15.3,24.1,25.8,32.3,33.1,25.6,27.7
5
+ Chatglm2-6B,24.8,24.7,36.6,36.5,37.6,37.6,40.5,40.5
6
+ Chatglm3-6B,43.38487973,43.38487973,44.58762887,44.58762887,42.09621993,42.09621993,43.47079038,43.47079038
7
+ Chinese-Alpaca-2-13B,37.7,37.7,49.7,49.7,48.6,48.6,50.5,50.5
8
+ Chinese-Llama-2-13B,29.4,29.4,37.8,37.8,40.4,40.4,28.8,28.8
9
+ Devops-Model-14B-Chat,30.69,30.59,55.77,63.63,63.85,61.96,41.15,44.01
10
+ Ernie-Bot-4.0,61.15,61.15,70.0,70.0,60.0,60.0,70.0,70.0
11
+ Gpt-3.5-Turbo,66.6,66.8,69.6,72.0,68.3,68.3,70.9,72.5
12
+ Gpt-4,,,,,,,88.7,88.7
13
+ Internlm-7B,38.7,38.7,43.9,43.9,45.2,45.2,51.4,51.4
14
+ Internlm2-Chat-20B,56.35738832,56.35738832,26.18025751,26.18025751,60.48109966,60.48109966,45.10309278,45.10309278
15
+ Internlm2-Chat-7B,49.74226804,49.74226804,56.18556701,56.18556701,48.19587629,48.19587629,49.74226804,49.74226804
16
+ Llama-2-13B,41.8,46.5,53.1,58.7,53.3,53.0,56.8,61.0
17
+ Llama-2-70B-Chat,25.29,25.29,57.97,58.06,52.97,52.97,58.55,58.55
18
+ Llama-2-7B,39.5,40.0,45.4,49.5,48.2,46.8,52.0,55.2
19
+ Mistral-7B,29.27,29.27,46.3,46.3,47.22,47.22,45.58,45.58
20
+ Qwen-14B-Chat,43.78,47.81,56.58,59.4,62.09,59.7,49.06,55.88
21
+ Qwen-72B-Chat,70.41,70.5,72.38,72.56,70.32,70.32,70.13,70.22
22
+ Qwen-7B-Chat,45.9,46.0,47.3,50.1,52.1,51.0,48.3,49.8
23
+ Yi-34B-Chat,57.75,59.14,65.11,68.79,68.16,68.37,78.09,80.06
24
+ gemma_2b,26.46048,26.46048,33.41924,33.41924,26.6323,26.6323,37.54296,37.54296
25
+ gemma_7b,25.08591,25.08591,50.85911,50.85911,30.24055,30.24055,51.55747,51.55747
26
+ Qwen1.5-14B-Base,34.87973,38.279481659390655,60.82474,76.69172932330827,65.54983,23.734458771084668,47.07904,33.241749376506874
27
+ Qwen1.5-14B-Chat,54.89691,34.87973,64.08935,60.82474,52.23368,65.54983,59.53608,47.07904
28
+ Qwen1.5-14B-Chat,,56.4433,,67.09622,,53.52234,,64.17526
data_v2/network_en_mc_ppl.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-8B-Instruct,57.03544575725027,65.41353383458647
data_v2/network_en_qa_gen.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,rouge1,rouge2,rouge_l,rouge_lsum,score,bp,sys_len,gpt4_score
2
+ GPT-3.5-turbo,13.38,5.65,12.13,12.26,6.78,1,2966,8.47
3
+ LLaMA-2-70B,8.69,2.51,7.62,7.74,4.2,1,4970,7.28
4
+ LLaMA-2-13B,5.75,1.68,5.03,4.98,3.43,1,8239,7.16
5
+ Chinese-Alpaca-2-13B,3.48,0.96,3.19,3.25,1.85,1,14716,6.66
6
+ Baichuan-13B-Chat,5.58,1.85,4.66,4.76,0.35,1,9577,5.85
7
+ Qwen-7B-Chat,13.03,4.76,11.61,11.82,4.33,1,3091,5.63
8
+ ChatGLM2-6B,10.43,3.24,9.82,9.71,5.07,0.91,2492,4.88
9
+ InternLM-7B,14.34,5.39,13.3,13.27,0.54,1,3112,4.52
10
+ Chinese-LLaMA-2-13B,9.18,2.9,9.22,9.19,0.24,1,32006,2.39
11
+
data_v2/network_zh_mc_gen.csv ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Aquilachat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,,
3
+ Baichuan-13B-Chat,15.2,16.0,43.9,49.7,34.3,36.1,51.3,55.6
4
+ Baichuan2-13B-Chat,35.6,35.9,30.5,30.5,34.6,35.6,30.2,32.0
5
+ Chatglm2-6B,33.8,33.7,42.1,42.2,36.0,36.0,39.5,39.5
6
+ Chatglm3-6B,41.39414802,41.39414802,49.22547332,49.22547332,38.81239243,38.81239243,42.85714286,42.85714286
7
+ Chinese-Alpaca-2-13B,33.1,33.1,44.2,44.2,44.0,44.0,42.7,42.7
8
+ Chinese-Llama-2-13B,22.5,22.5,38.8,38.8,41.8,41.8,32.2,32.2
9
+ Devops-Model-14B-Chat,47.59,46.57,52.52,56.01,62.07,60.08,50.59,55.79
10
+ Ernie-Bot-4.0,67.54,67.54,71.96,71.96,72.0,72.0,78.0,78.0
11
+ Glm3-Turbo,59.63855422,59.63855422,,,,,,
12
+ Glm4,67.383821,67.383821,,,,,,
13
+ Gpt-3.5-Turbo,58.4,58.6,64.8,67.6,59.2,59.7,65.2,67.4
14
+ Gpt-4,,,,,,,86.0,86.0
15
+ Hunyuan-13B,60.0,60.0,70.0,70.0,,,,
16
+ Internlm-7B,41.7,41.7,38.4,38.4,42.6,42.6,41.3,41.3
17
+ Internlm2-Chat-20B,57.48709122,57.48709122,57.14285714,57.14285714,59.1222031,59.1222031,50.77452668,50.77452668
18
+ Internlm2-Chat-7B,54.30292599,54.30292599,59.81067126,59.81067126,58.51979346,58.51979346,51.63511188,51.63511188
19
+ Llama-2-13B,29.7,31.6,51.6,57.0,39.6,38.9,48.0,50.6
20
+ Llama-2-70B-Chat,38.55,38.55,57.49,57.49,49.09,49.09,48.57,48.57
21
+ Llama-2-7B,29.8,30.2,50.1,55.6,38.6,40.8,45.6,50.4
22
+ Mistral-7B,1.9,1.9,45.61,45.61,15.0,15.0,35.97,35.97
23
+ Qwen-14B-Chat,48.35,48.81,55.35,57.4,58.53,56.12,52.12,54.99
24
+ Qwen-72B-Chat,65.77,65.86,68.13,68.3,69.4,69.4,69.99,70.08
25
+ Qwen-7B-Chat,29.6,29.9,50.6,53.5,50.4,46.9,46.9,47.7
26
+ Yi-34B-Chat,61.61,62.56,68.11,69.75,65.73,65.37,69.88,71.21
27
+ gemma_2b,29.69019,29.69019,39.15663,39.15663,29.77625,29.77625,38.64028,38.64028
28
+ gemma_7b,31.58348,31.58348,47.59036,47.59036,34.68158,34.68158,48.88124,48.88124
29
+ Qwen1.5-14B-Base,45.18072,35.904696806952444,59.1222,38.94801939914722,61.10155,41.717931191615406,52.4957,31.059792337987826
30
+ Qwen1.5-14B-Chat,54.04475,45.18072,62.56454,59.1222,58.77797,61.10155,63.42513,52.4957
31
+ Qwen1.5-14B-Chat,,53.87263,,63.85542,,58.0895,,65.57659
data_v2/network_zh_mc_ppl.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-8B-Instruct,48.97959183673469,55.85392051557465
data_v2/network_zh_qa_gen.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ name,rouge1,rouge2,rouge_l,rouge_lsum,score,bp,sys_len,gpt4_score
2
+ GPT-3.5-turbo,17.28,6.39,16.84,16.87,1.89,0.74,368,6.98
3
+ ChatGLM2-6B,6.92,1.97,6.83,6.75,0.11,1,1867,4.46
4
+ InternLM-7B,2.76,1.03,2.76,2.76,0.01,1,6053,2.22
5
+ Baichuan-13B-Chat,9.09,3.67,9.04,9.2,0.53,1,1125,5.14
6
+ LLaMA-2-13B,4.29,1.29,4.2,4.22,0.23,1,1581,5.03
7
+ Chinese-LLaMA-2-13B,4.96,4.11,4.7,4.73,0.01,1,11371,1.77
8
+ Chinese-Alpaca-2-13B,10.03,2.19,9.86,9.97,0.02,1,2605,4.71
9
+ Qwen-7B-Chat,10,2.45,9.94,10.05,0.23,0.42,257,5.07
10
+
data_v2/oracle_en_mc_gen.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Aquilachat2-34B,36.63,36.63,44.83,44.83,46.65,46.65,,
3
+ Baichuan-13B-Chat,12.47,11.67,16.5,19.52,24.55,22.54,26.36,28.77
4
+ Baichuan2-13B-Chat,17.1,19.1,18.7,22.9,25.9,26.5,20.9,24.5
5
+ Chatglm2-6B,20.72,20.52,19.92,19.72,20.12,20.12,22.94,22.74
6
+ Chatglm3-6B,20.92555332,20.92555332,25.15090543,25.15090543,24.74849095,24.74849095,29.1750503,29.1750503
7
+ Chinese-Alpaca-2-13B,23.14,23.14,28.97,28.97,16.3,16.3,14.29,14.29
8
+ Chinese-Llama-2-13B,13.88,13.88,20.52,20.52,16.9,16.9,23.34,23.34
9
+ Devops-Model-14B-Chat,25.15,26.96,35.41,38.83,33.2,34.81,27.36,27.36
10
+ Ernie-Bot-4.0,43.8,43.8,47.14,47.14,46.0,46.0,54.0,54.0
11
+ Gpt-3.5-Turbo,38.63,38.83,40.04,42.05,36.62,37.63,42.66,43.86
12
+ Gpt-4,,,59.02,64.56,,,58.35,62.58
13
+ Internlm-7B,26.36,26.36,25.55,25.55,25.55,25.55,27.97,27.97
14
+ Internlm2-Chat-20B,,,59.21052632,59.21052632,,,,
15
+ Internlm2-Chat-7B,27.16297787,27.16297787,28.16901408,28.16901408,29.97987928,29.97987928,30.18108652,30.18108652
16
+ Llama-2-13B,16.1,20.32,23.94,29.58,20.12,22.33,24.35,33.8
17
+ Llama-2-70B-Chat,19.72,19.72,27.97,27.97,26.56,26.56,32.6,32.6
18
+ Llama-2-7B,22.13,23.74,23.74,26.56,19.32,20.52,28.77,33.6
19
+ Mistral-7B,17.1,17.1,26.76,26.76,31.19,31.19,27.97,27.97
20
+ Qwen-14B-Chat,24.95,28.37,33.0,36.62,27.97,28.37,27.97,24.14
21
+ Qwen-72B-Chat,47.28,47.48,48.09,48.09,49.7,49.7,43.46,43.66
22
+ Qwen-7B-Chat,18.91,19.11,22.13,23.94,26.76,25.55,34.81,33.4
23
+ Yi-34B-Chat,47.08,48.69,47.08,46.28,58.15,58.35,56.94,58.95
24
+ gemma_2b,16.90141,16.90141,19.5171,19.5171,16.09658,16.09658,24.74849,24.74849
25
+ gemma_7b,14.28571,14.28571,30.98592,30.98592,2.60223,2.60223,43.85965,43.85965
26
+ Qwen1.5-14B-Base,29.17505,28.468825409248026,33.60161,40.47805387073632,36.82093,23.33528989760647,27.7666,34.6197743429205
27
+ Qwen1.5-14B-Chat,32.79678,29.17505,39.43662,33.60161,32.39437,36.82093,36.82093,27.7666
28
+ Qwen1.5-14B-Chat,,35.41247,,43.05835,,33.60161,,38.833
data_v2/oracle_en_mc_ppl.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ name,zero_self_con
2
+ Meta-Llama-3-8B-Instruct,48.68421052631579
data_v2/oracle_zh_mc_gen.csv ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Aquilachat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,,
3
+ Baichuan-13B-Chat,12.88,12.07,25.96,27.57,18.91,19.52,27.97,30.58
4
+ Baichuan2-13B-Chat,25.7,25.5,20.1,21.3,27.7,26.7,22.7,24.7
5
+ Chatglm2-6B,23.34,23.34,24.35,24.14,22.94,22.94,26.16,26.16
6
+ Chatglm3-6B,21.32796781,21.32796781,28.97384306,28.97384306,21.73038229,21.73038229,29.57746479,29.57746479
7
+ Chinese-Alpaca-2-13B,22.94,22.94,25.75,25.75,25.15,25.15,22.33,22.33
8
+ Chinese-Llama-2-13B,14.69,14.69,19.92,19.92,19.72,19.72,20.93,20.93
9
+ Devops-Model-14B-Chat,24.75,22.74,28.37,27.77,36.62,37.02,27.57,26.36
10
+ Ernie-Bot-4.0,48.56,48.56,50.64,50.64,48.0,48.0,54.0,54.0
11
+ Gpt-3.5-Turbo,36.42,35.81,39.24,43.26,39.84,39.44,27.16,27.77
12
+ Gpt-4,,,59.38,65.17,,,44.06,48.09
13
+ Internlm-7B,25.96,25.96,25.96,25.96,29.18,29.18,28.37,28.37
14
+ Internlm2-Chat-7B,28.57142857,28.57142857,31.79074447,31.79074447,30.78470825,30.78470825,31.18712274,31.18712274
15
+ Llama-2-13B,23.94,24.35,29.58,31.99,24.55,26.76,21.13,20.72
16
+ Llama-2-70B-Chat,15.29,15.29,34.81,34.81,26.76,26.76,33.8,33.8
17
+ Llama-2-7B,20.72,20.72,27.16,27.97,21.53,18.51,18.31,17.91
18
+ Mistral-7B,1.9,1.9,45.61,45.61,15.0,15.0,35.97,35.97
19
+ Qwen-14B-Chat,27.57,27.57,32.39,36.02,40.04,35.41,30.38,33.4
20
+ Qwen-72B-Chat,48.29,48.49,49.5,49.7,49.7,49.7,45.27,44.87
21
+ Qwen-7B-Chat,18.51,17.71,27.36,28.37,29.78,29.58,33.6,31.79
22
+ Yi-34B-Chat,49.9,49.3,52.72,53.72,56.34,56.34,51.31,54.33
23
+ gemma_2b,18.51107,18.51107,24.9497,24.9497,21.52918,21.52918,27.7666,27.7666
24
+ gemma_7b,19.3159,19.3159,53.94737,53.94737,18.51107,18.51107,5.204461,5.204461
25
+ Qwen1.5-14B-Base,20.92555,33.91785690993282,35.61368,27.773429857170807,41.44869,41.359323028761494,30.78471,32.62733972477663
26
+ Qwen1.5-14B-Chat,24.14487,20.92555,40.64386,35.61368,38.22938,41.44869,39.43662,30.78471
27
+ Qwen1.5-14B-Chat,,23.34004,,41.04628,,38.02817,,40.04024
data_v2/oracle_zh_mc_ppl.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-8B-Instruct,48.24561403508772,64.03508771929825
data_v2/owl_en_qa_gen.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,Faithfulness,Answer_Relevancy,Answer_Correctness,Answer_Similarity
2
+ Yi-6B-Chat,0.7030488252628491,0.8539968939451946,0.8345303291521267,0.8525487106316666
3
+ Qwen1.5-7B-Chat,0.7730288486126379,0.800263654571703,0.7701434694510604,0.8096226126029853
4
+ Vicuna-13B-V1.5,0.7252900133287701,0.8001237927827157,0.7513908712676198,0.8129508434301099
5
+ Gpt-3.5-Turbo,0.7601832993890021,0.9508419461436827,0.5945237008802382,0.8766794953769677
6
+ Qwen1.5-14B-Chat,0.8108575380359613,0.9186608285564491,0.5796170633787928,0.8627921465440817
7
+ Internlm2-Chat-20B,0.8823450852329868,0.8990122482812408,0.5663255561571012,0.817104818105292
8
+ Internlm2-Chat-7B,0.8716367920317769,0.9049173556355747,0.5566486218868514,0.8194293421446569
9
+ Vicuna-7B-V1.5,0.6687478686911705,0.8847336678547908,0.5491987169778965,0.8538950235036584
10
+ Qwen1.5-4B-Chat,0.7161414565826331,0.916949622281115,0.5415164042157119,0.8588077047327288
11
+ Qwen1.5-1.8B-Chat,0.7559747023809523,0.9469277644039529,0.5355121893517637,0.8511550798429494
12
+ Baichuan2-13B-Chat,0.724778459441036,0.9033782254193811,0.5324917996259314,0.8430175816264579
13
+ Baichuan2-7B-Chat,0.663319530710835,0.8543448236955469,0.5222686618152338,0.8364213008907668
14
+ Gemma-7B,0.5647578582126265,0.6814204309035338,0.5202336438594105,0.7806024397207423
15
+ Qwen1.5-0.5B-Chat,0.5679874805086168,0.8611226406276706,0.513748281764636,0.812332476681601
16
+ Mistral-7B,0.6586367313915859,0.7039079054469578,0.5078017923324171,0.7902698697096028
17
+ Gemma-2B,0.5049161881111284,0.6528267517862424,0.5059908632023802,0.7736166726699579
18
+ Yi-6B,0.5063160585604476,0.6749962990823568,0.49929516708962135,0.7789524853407436
data_v2/owl_zh_qa_gen.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,Faithfulness,Answer_Relevancy,Answer_Correctness,Answer_Similarity
2
+ Yi-6B-Chat,0.7600815667858686,0.7912540642980099,0.7610564816532298,0.6761740477117962
3
+ Qwen1.5-7B-Chat,0.7231431514350063,0.6963952727655642,0.7350526525675435,0.6957602523500673
4
+ Internlm2-Chat-7B,0.46693736812083075,0.30772894154056324,0.6038537202079961,0.7424094998006903
5
+ Gpt-3.5-Turbo,0.42171467364950055,0.8691295573045116,0.6038112679436712,0.870169867075605
6
+ Vicuna-13B-V1.5,0.613175326243208,0.682444153024839,0.5924435357123905,0.6493806776292096
7
+ Qwen1.5-14B-Chat,0.4991210277214334,0.7953872813217998,0.5891222801836271,0.8588771162081673
8
+ Internlm2-Chat-20B,0.7918762142818747,0.6187939679354695,0.5756624792803415,0.8195634463645642
9
+ Baichuan2-13B-Chat,0.4038672142368241,0.8192887757169468,0.5623602404354114,0.8562630521329339
10
+ Qwen1.5-4B-Chat,0.38204865489701556,0.8389697689558571,0.5521128189924648,0.8573317706502359
11
+ Gemma-7B,0.4059392201442353,0.35160449208958283,0.5377158689736348,0.7911868222195938
12
+ Qwen1.5-1.8B-Chat,0.5491781930806321,0.769059886385716,0.5344330706846868,0.8437705436461957
13
+ Baichuan2-7B-Chat,0.3683127572016461,0.7749038932071436,0.5101570739448591,0.8504995667294786
14
+ Yi-6B,0.366171888675488,0.35434640725576727,0.48557644112672105,0.7941020553273606
15
+ Qwen1.5-0.5B-Chat,0.2617687074829932,0.7710346032394836,0.4777335192926036,0.8346118169208395
16
+ Gemma-2B,0.45988486660889083,0.33416817486815076,0.4592030663597664,0.744093765199015
17
+ Mistral-7B,0.6409280685903916,0.4271051397084787,0.453115837113527,0.6903975251587856
18
+ Vicuna-7B-V1.5,0.39214979579762405,0.7771452487068145,0.44308513423549384,0.819478102998531
data_v2/pufa_zh_mc_gen.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,65.33,66.67,66.67,66.67,62.67,61.33,62.67,62.67
3
+ Chatglm3-6B,60.0,60.0,61.33333333,61.33333333,56.0,56.0,58.66666667,58.66666667
4
+ Devops-Model-14B-Chat,29.33,29.33,62.67,61.33,82.67,81.33,53.33,70.67
5
+ Ernie-Bot-4.0,86.67,86.67,86.67,86.67,82.67,82.67,86.67,86.67
6
+ Gpt-3.5-Turbo,77.33,77.33,84.0,81.33,76.0,78.67,84.0,82.67
7
+ GPT-4,88.0,88.0,86.67,86.67,84.0,84.0,90.67,90.67
8
+ Internlm2-Chat-20B,76.0,76.0,80.0,80.0,80.0,80.0,,
9
+ Internlm2-Chat-7B,78.66666667,78.66666667,72.0,72.0,72.0,72.0,53.33333333,53.33333333
10
+ Llama-2-13B,44.0,44.0,68.0,68.0,61.33,61.33,53.33,53.33
11
+ Llama-2-70B-Chat,6.67,6.67,65.33,65.33,49.33,49.33,66.67,66.67
12
+ Llama-2-7B,25.33,25.33,40.0,40.0,48.0,48.0,52.0,52.0
13
+ Mistral-7B,4.0,4.0,58.67,58.67,22.67,22.67,54.67,54.67
14
+ Qwen-14B-Chat,73.33,73.33,69.33,72.0,73.33,73.33,72.0,80.0
15
+ Qwen-72B-Chat,90.67,90.67,85.33,85.33,88.0,88.0,82.67,82.67
16
+ Yi-34B-Chat,84.0,84.0,88.0,88.0,90.67,92.0,78.67,89.33
17
+ gemma_2b,36.0,36.0,41.33333,41.33333,36.0,36.0,30.66667,30.66667
18
+ gemma_7b,34.66667,34.66667,56.0,56.0,46.66667,46.66667,56.0,56.0
19
+ Qwen1.5-14B-Base,78.66667,85.8108108108108,72.0,31.756756756756754,92.0,83.1081081081081,42.66667,27.7027027027027
20
+ Qwen1.5-14B-Chat,86.66667,78.66667,85.33333,72.0,78.66667,92.0,86.66667,42.66667
21
+ Qwen1.5-14B-Chat,,89.33333,,85.33333,,80.0,,85.33333
data_v2/pufa_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,85.13513513513513,89.1891891891892
3
+ Meta-Llama-3-8B-Instruct,,74.32432432432432
data_v2/rzy_zh_mc_gen.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,60.17,60.17,62.79,67.5,59.06,59.34,64.45,64.32
3
+ Chatglm3-6B,54.21853389,54.21853389,62.10235131,62.10235131,55.32503458,55.32503458,59.33609959,59.33609959
4
+ Devops-Model-14B-Chat,53.67,56.85,54.5,59.2,65.28,64.18,55.19,61.83
5
+ Ernie-Bot-4.0,76.0,76.0,79.0,79.0,73.0,73.0,77.0,77.0
6
+ Gpt-3.5-Turbo,65.28,65.42,66.39,67.5,65.28,66.25,68.05,68.74
7
+ GPT-4,65.56,65.56,68.05,68.05,65.28,65.28,68.19,68.19
8
+ Internlm2-Chat-20B,63.90041494,63.90041494,64.03872752,64.03872752,,,,
9
+ Internlm2-Chat-7B,61.2724758,61.2724758,63.62378976,63.62378976,65.00691563,65.00691563,54.21853389,54.21853389
10
+ Llama-2-13B,51.18,51.18,59.06,59.06,57.12,57.12,53.39,53.39
11
+ Llama-2-70B-Chat,5.26,5.26,62.52,62.52,48.82,48.82,59.75,59.75
12
+ Llama-2-7B,34.85,34.85,44.95,44.95,46.2,46.2,53.39,53.39
13
+ Mistral-7B,18.53,18.53,60.3,60.3,29.88,29.88,59.75,59.75
14
+ Qwen-14B-Chat,61.96,61.55,61.55,64.45,65.28,63.49,62.93,65.98
15
+ Qwen-72B-Chat,66.67,66.67,65.28,65.28,65.98,65.98,70.12,70.12
16
+ Yi-34B-Chat,64.45,64.59,67.77,67.36,60.17,60.03,57.68,57.54
17
+ gemma_2b,36.37621,36.37621,45.22822,45.22822,33.60996,33.60996,37.75934,37.75934
18
+ gemma_7b,39.41909,39.41909,54.77178,54.77178,42.04703,42.04703,56.70816,56.70816
19
+ Qwen1.5-14B-Base,51.17566,36.55172413793103,62.6556,28.27586206896552,65.42185,38.62068965517241,50.89903,34.48275862068966
20
+ Qwen1.5-14B-Chat,62.93223,51.17566,64.59198,62.6556,63.34716,65.42185,65.42185,50.89903
21
+ Qwen1.5-14B-Chat,,64.03873,,64.31535,,63.7621,,65.9751
data_v2/rzy_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,83.39100346020761,84.77508650519032
3
+ Meta-Llama-3-8B-Instruct,,77.33564013840831
data_v2/rzy_zh_qa_gen.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,Faithfulness,Answer_Relevancy,Answer_Correctness,Answer_Similarity
2
+ Qwen1.5-7B-Chat,0.736588808239606,0.7333071013844119,0.7653953121660069,0.7581706400802051
3
+ Yi-6B-Chat,0.8623512542497523,0.8477785816251643,0.7511472076832977,0.6674107034023887
4
+ Internlm2-Chat-20B,0.7579980374579645,0.5592058560996795,0.6230274906657811,0.8809475791391357
5
+ Qwen1.5-1.8B-Chat,0.6993731684981686,0.6912058643011517,0.5921887447381617,0.895549081535767
6
+ Gemma-2B,0.4793590757810048,0.269158909387365,0.5715789320903422,0.8084277426065398
7
+ Yi-6B,0.39791261211082174,0.35010016811319283,0.5554912360774231,0.8151911650862558
8
+ Internlm2-Chat-7B,0.6057084170408346,0.28950531392496315,0.5513185635050407,0.8527933874140172
9
+ Vicuna-13B-V1.5,0.6149588477366256,0.7175132054894446,0.5484350782035007,0.8846316742953054
10
+ Gpt-3.5-Turbo,0.6702526487367563,0.8535199907928265,0.5380443637081317,0.9113351056689803
11
+ Baichuan2-7B-Chat,0.6457107843137256,0.7989283627012825,0.5355149927949222,0.8918899008657395
12
+ Qwen1.5-14B-Chat,0.7039449112978525,0.7891124698018288,0.5351538957435175,0.9060753469650263
13
+ Qwen1.5-4B-Chat,0.6079656862745099,0.798414770802262,0.5349164010626877,0.8926774424126845
14
+ Vicuna-7B-V1.5,0.5618038576473784,0.7385375964159062,0.5346381268062822,0.8785135365491068
15
+ Qwen1.5-0.5B-Chat,0.5161804573314475,0.7335961705843393,0.5329134165403151,0.878000802003553
16
+ Baichuan2-13B-Chat,0.6229674796747967,0.8122416536307804,0.5111467259298673,0.8969644779921856
17
+ Gemma-7B,0.6952392516403653,0.42448628847691194,0.4304401424621823,0.6676771540611001
18
+ Mistral-7B,0.8060009447278426,0.5415825155389061,0.39330883346357015,0.5908077476385994
data_v2/tencent_zh_qa_gen.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,gpt4_score
2
+ Baichuan2-13B-Chat,8.727272727272727
3
+ DevOps-Model-14B-Chat,8.25974026
4
+ LLaMA-2-13B,7.636363636363637
5
+ LLaMA-2-70B-Chat,7.740259740259741
6
+ Mistral-7B,7.8441558441558445
7
+ Qwen-14B-Chat,8.642857142857142
8
+ Qwen-72B-Chat,8.811688311688311
9
+ GPT4,9.019480519480519
10
+ Yi-34B-Chat,8.844155844155845
11
+ ChatGLM3-6B,8.577922077922079
12
+ LLaMA-2-7B,5.318181818181818
13
+ GPT-3.5-turbo,8.850649351
14
+
data_v2/zabbix_zh_mc_gen.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan2-13B-Chat,31.0,29.0,47.0,47.0,29.0,27.0,40.0,43.0
3
+ Chatglm3-6B,29.0,29.0,36.0,36.0,29.0,29.0,34.0,34.0
4
+ Devops-Model-14B-Chat,27.0,28.0,36.0,33.0,46.0,44.0,44.0,46.0
5
+ Ernie-Bot-4.0,44.0,44.0,48.0,48.0,47.0,47.0,51.0,51.0
6
+ Gpt-3.5-Turbo,36.0,36.0,42.0,42.0,40.0,40.0,48.0,48.0
7
+ GPT-4,51.0,51.0,53.0,53.0,60.0,60.0,59.0,59.0
8
+ Internlm2-Chat-20B,41.0,41.0,,,44.0,44.0,,
9
+ Internlm2-Chat-7B,43.0,43.0,39.0,39.0,45.0,45.0,35.0,35.0
10
+ Llama-2-13B,28.0,28.0,45.0,45.0,40.0,40.0,43.0,43.0
11
+ Llama-2-70B-Chat,1.0,1.0,47.0,47.0,29.0,29.0,46.0,46.0
12
+ Llama-2-7B,18.0,18.0,35.0,35.0,22.0,22.0,28.0,28.0
13
+ Mistral-7B,6.0,6.0,42.0,42.0,11.0,11.0,44.0,44.0
14
+ Qwen-14B-Chat,36.0,36.0,39.0,41.0,44.0,40.0,47.0,43.0
15
+ Qwen-72B-Chat,46.0,46.0,44.0,44.0,45.0,45.0,61.0,61.0
16
+ Yi-34B-Chat,40.0,40.0,40.0,40.0,42.0,42.0,42.0,42.0
17
+ gemma_2b,25.0,25.0,32.0,32.0,24.0,24.0,30.0,30.0
18
+ gemma_7b,22.0,22.0,44.0,44.0,28.0,28.0,40.0,40.0
19
+ Qwen1.5-14B-Base,38.0,39.670138888888886,39.0,37.58680555555556,48.0,30.381944444444443,36.0,33.072916666666664
20
+ Qwen1.5-14B-Chat,34.0,38.0,45.0,39.0,42.0,48.0,48.0,36.0
21
+ Qwen1.5-14B-Chat,,34.0,,43.0,,39.0,,49.0
data_v2/zabbix_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,70.3125,76.5625
3
+ Meta-Llama-3-8B-Instruct,51.5625,56.25
data_v2/zabbix_zh_qa_gen.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,Faithfulness,Answer_Relevancy,Answer_Correctness,Answer_Similarity
2
+ Qwen1.5-7B-Chat,0.7784179666191972,0.6446005816609537,0.754576907135137,0.685739970384688
3
+ Gemma-2B,0.24509853619131058,0.3864916275690845,0.7478997831333918,0.5331677452195527
4
+ Qwen1.5-1.8B-Chat,0.6049019607843137,0.548904353980019,0.7198411238809446,0.9040869829649206
5
+ Yi-6B-Chat,0.7810660719751629,0.7167976589186875,0.6766795202404136,0.9015276627615246
6
+ Gpt-3.5-Turbo,0.7246376811594203,0.8043651144702015,0.6509014034090643,0.9218230132938211
7
+ Vicuna-7B-V1.5,0.4022032693674485,0.6008780718971581,0.6017002573832742,0.8863580258321797
8
+ Qwen1.5-14B-Chat,0.6328502415458936,0.7221033096305126,0.5966395914029399,0.9145067669966427
9
+ Qwen1.5-4B-Chat,0.5223151244890376,0.7184562339362471,0.5920241633149731,0.9031485610681586
10
+ Qwen1.5-0.5B-Chat,0.4166666666666667,0.6888304890847555,0.5800664845337345,0.8918967698708089
11
+ Internlm2-Chat-20B,0.7642667437926058,0.4826757943830156,0.5799585238847701,0.8876497593181047
12
+ Baichuan2-7B-Chat,0.48357487922705317,0.6222193378376162,0.578165574028535,0.9011250283968237
13
+ Vicuna-13B-V1.5,0.5991387785360396,0.6932977841508635,0.5747798091575604,0.8409686000423541
14
+ Baichuan2-13B-Chat,0.4896135265700483,0.7843769138572264,0.5653592173980313,0.9083387421281699
15
+ Internlm2-Chat-7B,0.558064058956916,0.10309630273051296,0.5526199329356135,0.8526944078477274
16
+ Yi-6B,0.3099052131839017,0.3558360880812697,0.5458649977309493,0.8362572777967558
17
+ Mistral-7B,0.6256150793650794,0.29009866821782115,0.5221570098966367,0.8399808519337731
18
+ Gemma-7B,0.4451515151515152,0.3045735267275342,0.5168775971677172,0.8368893516841295
data_v2/zjyd_zh_mc_gen.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan-13B-Chat,11.04,11.13,26.92,28.61,14.35,13.22,31.69,33.97
3
+ Chatglm2-6B,23.09,23.12,24.22,24.08,30.46,30.46,35.97,35.9
4
+ Chatglm3-6B,32.6,32.6,35.4,35.4,28.3,28.3,40.9,40.9
5
+ Chinese-Alpaca-2-13B,22.69,22.69,24.59,24.59,40.52,40.52,40.73,40.73
6
+ Chinese-Llama-2-13B,17.98,17.98,17.83,17.83,31.66,31.66,36.24,36.24
7
+ Devops-Model-14B-Chat,41.04,42.7,48.71,53.57,56.85,57.25,51.3,54.29
8
+ Ernie-Bot-4.0,45.99,45.99,48.98,48.98,46.0,46.0,54.0,54.0
9
+ Glm3-Turbo,43.0,43.0,,,,,,
10
+ Glm4,50.0,50.0,,,,,,
11
+ Gpt-3.5-Turbo,37.06,36.83,37.56,39.25,39.42,39.77,41.96,42.15
12
+ Gpt-4,,,57.35,62.11,,,61.2,65.68
13
+ Internlm-7B,27.81,27.81,19.95,19.95,24.18,24.18,35.35,35.35
14
+ Internlm2-Chat-20B,44.6,44.6,47.0,47.0,62.2,62.2,38.3,38.3
15
+ Internlm2-Chat-7B,38.8,38.8,44.6,44.6,46.0,46.0,35.8,35.8
16
+ Llama-2-13B,25.43,27.16,29.17,29.99,36.56,36.15,37.7,39.02
17
+ Llama-2-70B-Chat,24.38,24.38,43.63,43.63,44.65,44.65,48.84,48.84
18
+ Llama-2-7B,24.09,23.47,28.69,29.26,29.94,30.03,31.35,31.93
19
+ Mistral-7B,1.27,1.27,42.05,42.05,30.72,30.72,46.44,46.44
20
+ Qwen-14B-Chat,41.71,41.44,45.58,47.98,53.52,49.92,54.72,58.85
21
+ Qwen-72B-Chat,64.79,64.79,65.79,65.72,70.19,70.19,68.31,68.38
22
+ Qwen-7B-Chat,36.28,36.5,33.18,33.51,41.58,40.59,31.48,31.46
23
+ Yi-34B-Chat,64.91,64.58,62.77,65.51,70.85,70.92,48.77,47.97
24
+ gemma_2b,25.6,25.6,28.3,28.3,19.1,19.1,35.5,35.5
25
+ gemma_7b,27.3,27.3,35.4,35.4,17.3,17.3,44.5,44.5
26
+ Qwen1.5-14B-Base,49.1,63.425925925925924,49.9,0.0,62.5,66.2037037037037,41.3,25.0
27
+ Qwen1.5-14B-Chat,38.6,49.1,48.8,49.9,54.6,62.5,52.1,41.3
28
+ Qwen1.5-14B-Chat,,38.9,,50.5,,55.2,,52.7
data_v2/zjyd_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,84.25925925925925,88.88888888888889
3
+ Meta-Llama-3-8B-Instruct,71.29629629629629,79.62962962962963
data_v2/zjyd_zh_qa_gen.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,Faithfulness,Answer_Relevancy,Answer_Correctness,Answer_Similarity
2
+ Qwen1.5-7B-Chat,0.9001343784903166,0.8310295209988445,0.9078620109210616,0.8729269504884732
3
+ Qwen1.5-4B-Chat,0.8871057360749349,0.8993513260808733,0.8307124664634027,0.820558540099676
4
+ Gpt-3.5-Turbo,0.844,0.9381622494626971,0.7014364361923093,0.9646698567096922
5
+ Qwen1.5-14B-Chat,0.9143835616438356,0.9096799403053909,0.6924722613000067,0.9576526234420509
6
+ Vicuna-13B-V1.5,0.8201754385964912,0.8811313951272425,0.68730238555884,0.9481117257306625
7
+ Baichuan2-13B-Chat,0.902,0.9045473946630944,0.6857149093288882,0.9578530098977669
8
+ Baichuan2-7B-Chat,0.8784722222222222,0.896849978755001,0.6751955501292016,0.9500810985536641
9
+ Qwen1.5-1.8B-Chat,0.9148888888888888,0.8586071776868396,0.6748854449858851,0.947046701753897
10
+ Yi-6B-Chat,0.9511929511929511,0.7986143744572479,0.6694793902546,0.9285801614165997
11
+ Qwen1.5-0.5B-Chat,0.8277777777777777,0.8901546106376419,0.6588250813657541,0.9469939028778743
12
+ Vicuna-7B-V1.5,0.7171052631578947,0.8301247992194959,0.6521358982668551,0.9382112592746454
13
+ Internlm2-Chat-20B,0.8146430093452255,0.6294665932615476,0.5592223065723815,0.9031372380769384
14
+ Internlm2-Chat-7B,0.7936354405828091,0.6059388264548148,0.5497547973508542,0.9071347182079667
15
+ Gemma-7B,0.5690370087428911,0.294443307376398,0.5182431858082619,0.8437500469275063
16
+ Yi-6B,0.4679211960033877,0.29049526322106994,0.4910372529026469,0.8409424204038982
17
+ Mistral-7B,0.7985507246376812,0.40909012863946165,0.4698180894318443,0.85180274226269
18
+ Gemma-2B,0.5461295296041059,0.32955654240675497,0.4138425436194475,0.8085919354670669
data_v2/zte_en_mc_gen.csv ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan-13B-Chat,11.6,14.31,14.68,18.46,14.56,15.68,16.21,16.82
3
+ Chatglm2-6B,15.94,16.06,19.83,19.91,26.27,26.22,28.25,28.37
4
+ Chatglm3-6B,30.4,30.4,30.7,30.7,26.9,26.9,37.2,37.2
5
+ Chinese-Alpaca-2-13B,20.86,20.86,23.08,23.08,29.75,29.75,32.83,32.83
6
+ Chinese-Llama-2-13B,10.02,10.02,19.51,19.51,34.51,34.51,33.34,33.34
7
+ Devops-Model-14B-Chat,31.04,30.51,42.84,47.37,52.25,49.38,45.9,47.23
8
+ Ernie-Bot-4.0,43.66,43.66,51.99,51.99,44.0,44.0,50.0,50.0
9
+ Gpt-3.5-Turbo,35.04,34.82,38.46,43.5,39.29,39.19,41.01,42.58
10
+ Gpt-4,,,56.9,65.49,,,59.39,63.54
11
+ Internlm-7B,20.48,20.48,23.85,23.85,23.69,23.69,26.06,26.06
12
+ Internlm2-Chat-20B,39.1,39.1,37.7,37.7,47.7,47.7,33.5,33.5
13
+ Internlm2-Chat-7B,36.8,36.8,31.7,31.7,46.3,46.3,36.9,36.9
14
+ Llama-2-13B,15.62,18.32,29.88,34.45,23.16,29.14,37.59,44.3
15
+ Llama-2-70B-Chat,23.64,23.64,39.31,39.31,38.98,39.12,47.9,47.9
16
+ Llama-2-7B,19.42,21.62,25.46,27.11,21.45,24.85,33.6,34.83
17
+ Mistral-7B,26.91,26.91,30.65,30.65,40.52,40.52,46.84,46.84
18
+ Qwen-14B-Chat,33.71,36.25,41.24,42.51,51.19,50.39,57.18,59.18
19
+ Qwen-72B-Chat,53.19,53.19,55.25,55.52,58.13,58.13,58.72,58.99
20
+ Qwen-7B-Chat,33.37,33.74,32.97,34.1,32.98,32.7,36.6,36.65
21
+ Yi-34B-Chat,38.24,37.04,48.24,52.1,61.33,61.19,53.53,53.39
22
+ gemma_2b,20.1,20.1,24.2,24.2,31.2,31.2,35.5,35.5
23
+ gemma_7b,23.1,23.1,34.4,34.4,21.4,21.4,33.1,33.1
24
+ Qwen1.5-14B-Base,34.0,38.9,42.8,63.4,57.9,37.6,40.2,59.0
25
+ Qwen1.5-14B-Chat,34.5,24.7,41.7,35.4,33.2,19.7,46.2,32.9
26
+ Qwen1.5-14B-Base,,34.0,,42.8,,57.9,,40.2
27
+ Qwen1.5-14B-Chat,,35.6,,41.1,,34.7,,47.4
data_v2/zte_en_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,56.99999999999999,64.0
3
+ Meta-Llama-3-8B-Instruct,35.199999999999996,48.4
data_v2/zte_zh_mc_gen.csv ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
2
+ Baichuan-13B-Chat,11.04,31.1,37.4,37.4,14.35,51.10000000000001,36.900000000000006,36.900000000000006
3
+ Chatglm2-6B,23.09,31.1,34.3,34.3,30.46,36.0,37.1,37.1
4
+ Chatglm3-6B,32.6,,,,28.3,,,
5
+ Chinese-Alpaca-2-13B,22.69,,,,40.52,,,
6
+ Chinese-Llama-2-13B,17.98,,,,31.66,,,
7
+ Devops-Model-14B-Chat,41.04,,,,56.85,,,
8
+ Ernie-Bot-4.0,45.99,,,,46.0,,,
9
+ Glm3-Turbo,43.0,,,,,,,
10
+ Glm4,50.0,,,,,,,
11
+ Gpt-3.5-Turbo,37.06,,,,39.42,,,
12
+ Gpt-4,,,,,,,,
13
+ Internlm-7B,27.81,,,,24.18,,,
14
+ Internlm2-Chat-20B,44.6,,,,62.2,,,
15
+ Internlm2-Chat-7B,38.8,,,,46.0,,,
16
+ Llama-2-13B,25.43,,,,36.56,,,
17
+ Llama-2-70B-Chat,24.38,,,,44.65,,,
18
+ Llama-2-7B,24.09,,,,29.94,,,
19
+ Mistral-7B,1.27,,,,30.72,,,
20
+ Qwen-14B-Chat,41.71,,,,53.52,,,
21
+ Qwen-72B-Chat,64.79,,,,70.19,,,
22
+ Qwen-7B-Chat,36.28,,,,41.58,,,
23
+ Yi-34B-Chat,64.91,,,,70.85,,,
24
+ gemma_2b,25.6,,,,19.1,,,
25
+ gemma_7b,27.3,,,,17.3,,,
26
+ Qwen1.5-14B-Base,49.1,,,,62.5,,,
27
+ Qwen1.5-14B-Chat,38.6,,,,54.6,,,
data_v2/zte_zh_mc_ppl.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ name,zero_self_con,few_self_con
2
+ Meta-Llama-3-70B-Instruct,56.8,66.0
3
+ Meta-Llama-3-8B-Instruct,38.2,44.2