dh-mc commited on
Commit
db6381e
·
1 Parent(s): 42a612f

fixed bugs in metrics

Browse files
eval_modules/calc_repetitions_v2e.py CHANGED
@@ -172,9 +172,18 @@ def load_for_repetition_penalty(
172
  )
173
 
174
 
175
- def calc_adjusted_performance(f, r, l=1):
 
 
 
 
 
 
 
 
 
176
  n = 1 - r / l if l > 0 else 0
177
- return f * n * n * n
178
 
179
 
180
  def calculate_adjusted_performance(row):
@@ -1138,7 +1147,9 @@ webqsp_csv_result_files = [
1138
  ]
1139
 
1140
 
1141
- def calc_rap_scores(result, precision="precision", recall="recall"):
 
 
1142
  newline_score = [
1143
  df["newline_score"].mean() for df in result["df_list_repetition_penalty"]
1144
  ]
@@ -1165,7 +1176,10 @@ def calc_rap_scores(result, precision="precision", recall="recall"):
1165
  )
1166
  ]
1167
 
1168
- rap = [calc_adjusted_performance(f, 1 - n) for f, n in zip(f1, nrr)]
 
 
 
1169
 
1170
  return newline_score, repetition_score, f1, rap, nrr
1171
 
@@ -1177,7 +1191,9 @@ def get_model_name(csv_result_file):
1177
  return model_name
1178
 
1179
 
1180
- def load_webqsp_result(csv_result_files, force_recalculate=False, save=False):
 
 
1181
  result = {}
1182
  for i, csv_result_file in enumerate(csv_result_files):
1183
  try:
@@ -1205,7 +1221,7 @@ def load_webqsp_result(csv_result_files, force_recalculate=False, save=False):
1205
  "file": csv_result_file,
1206
  }
1207
  newline_score, repetition_score, perf, rap, nrr = calc_rap_scores(
1208
- result[model_name]
1209
  )
1210
  df["newline_score"] = newline_score
1211
  df["repetition_score"] = repetition_score
@@ -1214,6 +1230,7 @@ def load_webqsp_result(csv_result_files, force_recalculate=False, save=False):
1214
  df["nrr"] = nrr
1215
  df["rap"] = rap
1216
  df["rr"] = df["nrr"].apply(lambda x: 1 - x)
 
1217
  if save:
1218
  df.to_csv(csv_result_file, index=False)
1219
  except Exception as e:
@@ -1224,7 +1241,11 @@ def load_webqsp_result(csv_result_files, force_recalculate=False, save=False):
1224
 
1225
 
1226
  def load_ms_marco_result(
1227
- csv_result_files, force_recalculate=False, calc_bertscore=False, save=False
 
 
 
 
1228
  ):
1229
  result = {}
1230
  for csv_result_file in csv_result_files:
@@ -1291,6 +1312,7 @@ def load_ms_marco_result(
1291
  result[model_name],
1292
  precision=col,
1293
  recall=col,
 
1294
  )
1295
  df["newline_score"] = newline_score
1296
  df["repetition_score"] = repetition_score
@@ -1299,6 +1321,7 @@ def load_ms_marco_result(
1299
  df["nrr"] = nrr
1300
  df["rap"] = rap
1301
  df["rr"] = df["nrr"].apply(lambda x: 1 - x)
 
1302
 
1303
  if save:
1304
  df.to_csv(csv_result_file, index=False)
 
172
  )
173
 
174
 
175
+ rap_penalty_functions = {
176
+ "linear": lambda x: x,
177
+ "quadratic": lambda x: x * x,
178
+ "cubic": lambda x: x * x * x,
179
+ "logarithmic": lambda x: math.log(x + 1, 2),
180
+ "exponential": lambda x: math.exp(x - 1),
181
+ }
182
+
183
+
184
+ def calc_adjusted_performance(f, r, l=1, penalty_function="cubic"):
185
  n = 1 - r / l if l > 0 else 0
186
+ return f * rap_penalty_functions[penalty_function](n)
187
 
188
 
189
  def calculate_adjusted_performance(row):
 
1147
  ]
1148
 
1149
 
1150
+ def calc_rap_scores(
1151
+ result, precision="precision", recall="recall", penalty_function="cubic"
1152
+ ):
1153
  newline_score = [
1154
  df["newline_score"].mean() for df in result["df_list_repetition_penalty"]
1155
  ]
 
1176
  )
1177
  ]
1178
 
1179
+ rap = [
1180
+ calc_adjusted_performance(f, 1 - n, penalty_function=penalty_function)
1181
+ for f, n in zip(f1, nrr)
1182
+ ]
1183
 
1184
  return newline_score, repetition_score, f1, rap, nrr
1185
 
 
1191
  return model_name
1192
 
1193
 
1194
+ def load_webqsp_result(
1195
+ csv_result_files, force_recalculate=False, save=False, penalty_function="cubic"
1196
+ ):
1197
  result = {}
1198
  for i, csv_result_file in enumerate(csv_result_files):
1199
  try:
 
1221
  "file": csv_result_file,
1222
  }
1223
  newline_score, repetition_score, perf, rap, nrr = calc_rap_scores(
1224
+ result[model_name], penalty_function=penalty_function
1225
  )
1226
  df["newline_score"] = newline_score
1227
  df["repetition_score"] = repetition_score
 
1230
  df["nrr"] = nrr
1231
  df["rap"] = rap
1232
  df["rr"] = df["nrr"].apply(lambda x: 1 - x)
1233
+ df["rrp"] = df["rr"].apply(lambda x: x * 100)
1234
  if save:
1235
  df.to_csv(csv_result_file, index=False)
1236
  except Exception as e:
 
1241
 
1242
 
1243
  def load_ms_marco_result(
1244
+ csv_result_files,
1245
+ force_recalculate=False,
1246
+ calc_bertscore=True,
1247
+ save=False,
1248
+ penalty_function="cubic",
1249
  ):
1250
  result = {}
1251
  for csv_result_file in csv_result_files:
 
1312
  result[model_name],
1313
  precision=col,
1314
  recall=col,
1315
+ penalty_function=penalty_function,
1316
  )
1317
  df["newline_score"] = newline_score
1318
  df["repetition_score"] = repetition_score
 
1321
  df["nrr"] = nrr
1322
  df["rap"] = rap
1323
  df["rr"] = df["nrr"].apply(lambda x: 1 - x)
1324
+ df["rrp"] = df["rr"].apply(lambda x: x * 100)
1325
 
1326
  if save:
1327
  df.to_csv(csv_result_file, index=False)
llm_toolkit/translation_utils_v2.py CHANGED
@@ -294,6 +294,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
294
  rr = []
295
  num_max_output_tokens = []
296
  translation_completeness = []
 
297
  columns = df.columns[2:]
298
 
299
  new_col = f"count_chinese_characters-ground_truth"
@@ -349,6 +350,9 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
349
 
350
  rr.append(df["total_repetitions"].mean() / df["answer_len"].mean())
351
 
 
 
 
352
  model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
353
 
354
  new_col = f"ground_truth_tokens-{model}"
@@ -372,22 +376,28 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
372
  num_max_output_tokens.append(
373
  count_entries_with_max_tokens(df[new_col], max_output_tokens)
374
  )
375
-
376
  metrics_df["comet"] = comet
377
  metrics_df["meteor"] = meteor
378
  metrics_df["spbleu"] = spbleu
379
  metrics_df["bleu_1"] = bleu_1
380
  metrics_df["rouge_l"] = rouge_l
381
  metrics_df["ews_score"] = ews_score
 
382
  metrics_df["repetition_score"] = repetition_score
383
  metrics_df["total_repetitions"] = total_repetitions
384
  metrics_df["rr"] = rr
 
 
 
 
385
  metrics_df["rap"] = metrics_df.apply(
386
  lambda x: calc_adjusted_performance(x["comet"], x["rr"]), axis=1
387
  )
388
 
389
  metrics_df["translation_completeness"] = translation_completeness
390
  metrics_df["num_max_output_tokens"] = num_max_output_tokens
 
391
 
392
  if variant != "rpp":
393
  metrics_df[variant] = metrics_df[variant].astype(int)
 
294
  rr = []
295
  num_max_output_tokens = []
296
  translation_completeness = []
297
+ percentage_of_repeated_entries = []
298
  columns = df.columns[2:]
299
 
300
  new_col = f"count_chinese_characters-ground_truth"
 
350
 
351
  rr.append(df["total_repetitions"].mean() / df["answer_len"].mean())
352
 
353
+ r, t = df[df["total_repetitions"] > 0].shape[0], df.shape[0]
354
+ percentage_of_repeated_entries.append(100 * r / t)
355
+
356
  model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
357
 
358
  new_col = f"ground_truth_tokens-{model}"
 
376
  num_max_output_tokens.append(
377
  count_entries_with_max_tokens(df[new_col], max_output_tokens)
378
  )
379
+
380
  metrics_df["comet"] = comet
381
  metrics_df["meteor"] = meteor
382
  metrics_df["spbleu"] = spbleu
383
  metrics_df["bleu_1"] = bleu_1
384
  metrics_df["rouge_l"] = rouge_l
385
  metrics_df["ews_score"] = ews_score
386
+ metrics_df["newline_score"] = ews_score
387
  metrics_df["repetition_score"] = repetition_score
388
  metrics_df["total_repetitions"] = total_repetitions
389
  metrics_df["rr"] = rr
390
+ metrics_df["rrp"] = metrics_df["rr"].apply(
391
+ lambda x: x * 100
392
+ )
393
+ metrics_df["perf"] = metrics_df["comet"]
394
  metrics_df["rap"] = metrics_df.apply(
395
  lambda x: calc_adjusted_performance(x["comet"], x["rr"]), axis=1
396
  )
397
 
398
  metrics_df["translation_completeness"] = translation_completeness
399
  metrics_df["num_max_output_tokens"] = num_max_output_tokens
400
+ metrics_df["percentage_of_repeated_entries"] = percentage_of_repeated_entries
401
 
402
  if variant != "rpp":
403
  metrics_df[variant] = metrics_df[variant].astype(int)
notebooks/00e_Data Analysis_Fine_Tuned_RPP_MNT_2048.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8bc1bf611e23912d5feea081cf815248a9b780e82063fbf06fd4b4d0b36501a
3
- size 1874906
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e2555b6c94d217415635bcfede10150bbccf4b1ab1df0baeea9687303963c8
3
+ size 2683424
notebooks/00f_Data Analysis_Fine_Tuned_RPP_Generic_Prompt.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb7b93faf022fee08c2a9dea2f30e373c9f9d9fa0d6e445011d774a89155165d
3
- size 2620163
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba968d834c1dcff62626ce158426ae25a8b9ed6f1749b827ff4f8c16815e632e
3
+ size 65147929
results/mac-results_rpp_with_mnt_2048_generic_prompt_metrics.csv CHANGED
@@ -1,27 +1,32 @@
1
- model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rr,rap,translation_completeness,num_max_output_tokens
2
- internlm/internlm2_5-7b-chat,1.00,0.7357995069773978,0.4297612514398102,15.060226683930628,0.1506022668393063,0.4097577795330234,0.04942630185348632,9.235657546337158,9.285083848190645,0.07525035765379114,0.581878095297299,1.0,2
3
- internlm/internlm2_5-7b-chat,1.02,0.7377187550620283,0.4246676977198055,14.728605282752795,0.147286052827528,0.4063246630867048,0.06972639011473963,5.35657546337158,5.426301853486319,0.04625547346404442,0.6400103546749837,1.0,1
4
- internlm/internlm2_5-7b-chat,1.04,0.7371160490183523,0.4173352728374962,13.846403511622256,0.1384640351162226,0.3988121301027288,0.06884377758164166,5.315092674315975,5.383936451897617,0.04501878242643857,0.6419783129560218,1.0,1
5
- internlm/internlm2_5-7b-chat,1.06,0.7338597697698218,0.3997609847704189,12.213374588416173,0.1221337458841617,0.3841365748920261,0.05825242718446602,5.275375110326567,5.333627537511033,0.043830827367611756,0.6415304775228277,1.0,1
6
- internlm/internlm2_5-7b-chat,1.08,0.7318234702626478,0.3881614120395272,11.369735763522288,0.1136973576352228,0.372963223209074,0.06707855251544571,5.283318623124448,5.350397175639894,0.04300663332269164,0.6414061446416202,1.0,1
7
- internlm/internlm2_5-7b-chat,1.10,0.7288648442604431,0.3784182249483568,10.377989030628608,0.103779890306286,0.3618424457502351,0.05207413945278023,5.288614298323036,5.340688437775817,0.042176064682512025,0.6404777687452995,1.0,1
8
- microsoft/Phi-3.5-mini-instruct,1.00,0.710605339281136,0.3788926591792472,9.70032874202361,0.097003287420236,0.3556134739443916,5.390997352162401,12.997352162400706,18.388349514563107,0.13770903562694164,0.4556065638568846,1.0,4
9
- microsoft/Phi-3.5-mini-instruct,1.02,0.7150978385770836,0.3741049510326346,9.910633597905436,0.0991063359790543,0.3453160556383774,3.586054721977052,7.001765225066196,10.587819947043249,0.08180522500528503,0.5535666483700645,1.0,2
10
- microsoft/Phi-3.5-mini-instruct,1.04,0.7074641684778791,0.3538698731015666,9.19721270538052,0.0919721270538052,0.3225824135517728,0.05119152691968226,0.05560458958517211,0.10679611650485436,0.000859149229250836,0.7056422827612971,1.0,0
11
- microsoft/Phi-3.5-mini-instruct,1.06,0.6962301708225224,0.3252854575717334,6.967166383106307,0.069671663831063,0.2948764736589108,0.0353045013239188,0.06796116504854369,0.10326566637246248,0.0007865281839265906,0.6945886486476809,1.0,0
12
- microsoft/Phi-3.5-mini-instruct,1.08,0.6823413657174107,0.301599095293242,5.452744292893752,0.0545274429289375,0.2726387617958179,0.07678729037952339,0.04766107678729038,0.12444836716681378,0.0009016671249608319,0.6804972951227785,1.0,0
13
- microsoft/Phi-3.5-mini-instruct,1.10,0.6717851540206916,0.2885734336603344,4.751039447225815,0.0475103944722581,0.2604284999048123,0.08031774051191527,0.02383053839364519,0.10414827890556046,0.0007188284314919954,0.6703375003284932,1.0,0
14
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.739080294072365,0.4490104515425626,6.7013404492782405,0.0670134044927823,0.4196181637680596,0.36716681376875554,139.80935569285083,140.1765225066196,0.48362195756964893,0.10176417668651536,0.999117387466902,15
15
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.743018615750854,0.4514907128972251,8.545954556237808,0.085459545562378,0.4214940415288087,1.0035304501323918,67.00353045013239,68.00706090026479,0.2929644725635723,0.2626173445806161,1.0,6
16
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7432195577780335,0.4517500968367987,10.080425294411064,0.1008042529441106,0.4200973007348334,0.01059135039717564,35.19770520741395,35.208296557811124,0.17564306911947306,0.4163542580947422,1.0,6
17
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.7430821573139815,0.4484154407825542,10.37470506193322,0.1037470506193321,0.4160289393328045,1.8005295675198587,26.880847308031775,28.68137687555163,0.1522966823356282,0.45265620897504616,1.0,3
18
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7435937259684909,0.4407733547418294,10.930453247368872,0.1093045324736887,0.4113063412348818,0.09267431597528684,12.007943512797882,12.100617828773169,0.06721477842655646,0.6035047423944578,1.0,3
19
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7427059700687901,0.4358940590119784,11.381344076286156,0.1138134407628615,0.4062980635945339,0.03971756398940865,0.6681376875551632,0.707855251544572,0.003961824217515018,0.7339134850401315,1.0,1
20
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.3888604919913587,0.2055875758168277,0.2434587181959752,0.0024345871819597,0.1844552188025856,638.2797881729921,3889.9232127096207,4528.203000882612,0.9210262088655917,0.00019153263422509847,0.9240953221535746,570
21
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.401959364669889,0.2020993340187826,0.2473696547531083,0.002473696547531,0.1795542969510355,611.315975286849,3759.7599293909975,4371.075904677847,0.8883366762655638,0.0005596465146821489,0.912621359223301,562
22
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7222260562908512,0.4039898602650971,13.461179673541356,0.1346117967354136,0.3819960428004565,0.05736981465136805,5.87378640776699,5.931156222418358,0.05150372482295595,0.6162827926700337,1.0,1
23
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.723643534970515,0.4051102919608809,13.18537912294539,0.1318537912294539,0.3824621732976229,0.06266548984995587,5.840247131509267,5.902912621359223,0.05148734372113075,0.617524335498735,1.0,1
24
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7238812581796301,0.4039456988919502,13.314773371306682,0.1331477337130668,0.3813737464821349,0.05736981465136805,5.845542806707855,5.902912621359223,0.05127418810757766,0.6181437496179476,1.0,1
25
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7252625281686607,0.4012797167602334,13.19924345265053,0.1319924345265053,0.3798291332004637,0.06266548984995587,5.847308031774051,5.909973521624007,0.05081388730791121,0.6202251404786316,1.0,1
26
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.7261167238322592,0.3987395126194482,12.656486100206328,0.1265648610020633,0.376975448872996,0.05648720211827008,5.820829655781112,5.877316857899382,0.05012721880128273,0.6223042523816,1.0,1
27
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7264630642225547,0.3964859769229444,12.284961706379857,0.1228496170637985,0.3744555065346823,0.04942630185348632,0.09267431597528684,0.14210061782877317,0.001266948385624464,0.7237053873903428,1.0,0
 
 
 
 
 
 
1
+ model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,newline_score,repetition_score,total_repetitions,rr,rrp,perf,rap,translation_completeness,num_max_output_tokens,percentage_of_repeated_entries
2
+ Qwen/Qwen2-72B-Instruct,1.00,0.7457879471336964,0.4655033970145371,11.09789843749336,0.1109789843749336,0.4305484344779789,20.488967343336274,20.488967343336274,27.70079435127979,48.18976169461607,0.27753124825269027,27.753124825269026,0.7457879471336964,0.2812370692049126,0.9902912621359223,10,4.148278905560459
3
+ Qwen/Qwen2-72B-Instruct,1.02,0.7498899340261324,0.4659242900672017,15.078064414763237,0.1507806441476323,0.4322160855974918,2.8102383053839364,2.8102383053839364,15.191526919682259,18.001765225066197,0.1255625257792252,12.55625257792252,0.7498899340261324,0.5013993970649349,0.9947043248014121,3,3.795233892321271
4
+ internlm/internlm2_5-7b-chat,1.00,0.7357995069773978,0.4297612514398102,15.060226683930628,0.1506022668393063,0.4097577795330234,0.04942630185348632,0.04942630185348632,9.235657546337158,9.285083848190645,0.07525035765379114,7.525035765379114,0.7357995069773978,0.581878095297299,1.0,2,1.853486319505737
5
+ internlm/internlm2_5-7b-chat,1.02,0.7377187550620283,0.4246676977198055,14.728605282752795,0.147286052827528,0.4063246630867048,0.06972639011473963,0.06972639011473963,5.35657546337158,5.426301853486319,0.04625547346404442,4.625547346404442,0.7377187550620283,0.6400103546749837,1.0,1,2.294792586054722
6
+ internlm/internlm2_5-7b-chat,1.04,0.7371160490183523,0.4173352728374962,13.846403511622256,0.1384640351162226,0.3988121301027288,0.06884377758164166,0.06884377758164166,5.315092674315975,5.383936451897617,0.04501878242643857,4.5018782426438575,0.7371160490183523,0.6419783129560218,1.0,1,2.118270079435128
7
+ internlm/internlm2_5-7b-chat,1.06,0.7338597697698218,0.3997609847704189,12.213374588416173,0.1221337458841617,0.3841365748920261,0.05825242718446602,0.05825242718446602,5.275375110326567,5.333627537511033,0.043830827367611756,4.3830827367611755,0.7338597697698218,0.6415304775228277,1.0,1,1.853486319505737
8
+ internlm/internlm2_5-7b-chat,1.08,0.7318234702626478,0.3881614120395272,11.369735763522288,0.1136973576352228,0.372963223209074,0.06707855251544571,0.06707855251544571,5.283318623124448,5.350397175639894,0.04300663332269164,4.300663332269164,0.7318234702626478,0.6414061446416202,1.0,1,2.030008826125331
9
+ internlm/internlm2_5-7b-chat,1.10,0.7288648442604431,0.3784182249483568,10.377989030628608,0.103779890306286,0.3618424457502351,0.05207413945278023,0.05207413945278023,5.288614298323036,5.340688437775817,0.042176064682512025,4.217606468251202,0.7288648442604431,0.6404777687452995,1.0,1,1.941747572815534
10
+ microsoft/Phi-3.5-mini-instruct,1.00,0.710605339281136,0.3788926591792472,9.70032874202361,0.097003287420236,0.3556134739443916,5.390997352162401,5.390997352162401,12.997352162400706,18.388349514563107,0.13770903562694164,13.770903562694164,0.710605339281136,0.4556065638568846,1.0,4,2.118270079435128
11
+ microsoft/Phi-3.5-mini-instruct,1.02,0.7150978385770836,0.3741049510326346,9.910633597905436,0.0991063359790543,0.3453160556383774,3.586054721977052,3.586054721977052,7.001765225066196,10.587819947043249,0.08180522500528503,8.180522500528504,0.7150978385770836,0.5535666483700645,1.0,2,1.147396293027361
12
+ microsoft/Phi-3.5-mini-instruct,1.04,0.7074641684778791,0.3538698731015666,9.19721270538052,0.0919721270538052,0.3225824135517728,0.05119152691968226,0.05119152691968226,0.05560458958517211,0.10679611650485436,0.000859149229250836,0.0859149229250836,0.7074641684778791,0.7056422827612971,1.0,0,1.147396293027361
13
+ microsoft/Phi-3.5-mini-instruct,1.06,0.6962301708225224,0.3252854575717334,6.967166383106307,0.069671663831063,0.2948764736589108,0.0353045013239188,0.0353045013239188,0.06796116504854369,0.10326566637246248,0.0007865281839265906,0.07865281839265906,0.6962301708225224,0.6945886486476809,1.0,0,1.235657546337158
14
+ microsoft/Phi-3.5-mini-instruct,1.08,0.6823413657174107,0.301599095293242,5.452744292893752,0.0545274429289375,0.2726387617958179,0.07678729037952339,0.07678729037952339,0.04766107678729038,0.12444836716681378,0.0009016671249608319,0.0901667124960832,0.6823413657174107,0.6804972951227785,1.0,0,1.500441306266549
15
+ microsoft/Phi-3.5-mini-instruct,1.10,0.6717851540206916,0.2885734336603344,4.751039447225815,0.0475103944722581,0.2604284999048123,0.08031774051191527,0.08031774051191527,0.02383053839364519,0.10414827890556046,0.0007188284314919954,0.07188284314919954,0.6717851540206916,0.6703375003284932,1.0,0,1.676963812886143
16
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.739080294072365,0.4490104515425626,6.7013404492782405,0.0670134044927823,0.4196181637680596,0.36716681376875554,0.36716681376875554,139.80935569285083,140.1765225066196,0.48362195756964893,48.362195756964894,0.739080294072365,0.10176417668651536,0.999117387466902,15,4.766107678729038
17
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.743018615750854,0.4514907128972251,8.545954556237808,0.085459545562378,0.4214940415288087,1.0035304501323918,1.0035304501323918,67.00353045013239,68.00706090026479,0.2929644725635723,29.296447256357226,0.743018615750854,0.2626173445806161,1.0,6,3.353927625772286
18
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7432195577780335,0.4517500968367987,10.080425294411064,0.1008042529441106,0.4200973007348334,0.01059135039717564,0.01059135039717564,35.19770520741395,35.208296557811124,0.17564306911947306,17.564306911947305,0.7432195577780335,0.4163542580947422,1.0,6,2.912621359223301
19
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.7430821573139815,0.4484154407825542,10.37470506193322,0.1037470506193321,0.4160289393328045,1.8005295675198587,1.8005295675198587,26.880847308031775,28.68137687555163,0.1522966823356282,15.22966823356282,0.7430821573139815,0.45265620897504616,1.0,3,2.5595763459841128
20
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7435937259684909,0.4407733547418294,10.930453247368872,0.1093045324736887,0.4113063412348818,0.09267431597528684,0.09267431597528684,12.007943512797882,12.100617828773169,0.06721477842655646,6.721477842655646,0.7435937259684909,0.6035047423944578,1.0,3,2.471315092674316
21
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7427059700687901,0.4358940590119784,11.381344076286156,0.1138134407628615,0.4062980635945339,0.03971756398940865,0.03971756398940865,0.6681376875551632,0.707855251544572,0.003961824217515018,0.3961824217515018,0.7427059700687901,0.7339134850401315,1.0,1,2.64783759929391
22
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.3888604919913587,0.2055875758168277,0.2434587181959752,0.0024345871819597,0.1844552188025856,638.2797881729921,638.2797881729921,3889.9232127096207,4528.203000882612,0.9210262088655917,92.10262088655917,0.3888604919913587,0.00019153263422509847,0.9240953221535746,570,89.40864960282435
23
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.401959364669889,0.2020993340187826,0.2473696547531083,0.002473696547531,0.1795542969510355,611.315975286849,611.315975286849,3759.7599293909975,4371.075904677847,0.8883366762655638,88.83366762655638,0.401959364669889,0.0005596465146821489,0.912621359223301,562,88.87908208296558
24
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.4185898168652013,0.1966595434176072,0.2555659721821757,0.0025556597218217,0.1694446848890552,565.2135922330098,565.2135922330098,3662.4289496910856,4227.642541924095,0.8709795612734481,87.09795612734482,0.4185898168652013,0.0008990093364229676,0.8702559576345984,524,87.99646954986761
25
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.4381090624082014,0.1859205661699343,0.2641725452341745,0.0026417254523417,0.1557244147977393,515.5233892321271,515.5233892321271,3521.3830538393645,4036.9064430714916,0.8049867903962987,80.49867903962988,0.4381090624082014,0.0032491841590587176,0.8402471315092674,514,86.31950573698147
26
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.4538654954503935,0.1738923185096792,0.2607684997123511,0.0026076849971235,0.1432980302984092,480.180052956752,480.180052956752,3378.7625772285965,3858.9426301853487,0.7472985052942835,74.72985052942835,0.4538654954503935,0.007324038178749702,0.8331862312444837,477,85.3486319505737
27
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7222260562908512,0.4039898602650971,13.461179673541356,0.1346117967354136,0.3819960428004565,0.05736981465136805,0.05736981465136805,5.87378640776699,5.931156222418358,0.05150372482295595,5.1503724822955945,0.7222260562908512,0.6162827926700337,1.0,1,2.471315092674316
28
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.723643534970515,0.4051102919608809,13.18537912294539,0.1318537912294539,0.3824621732976229,0.06266548984995587,0.06266548984995587,5.840247131509267,5.902912621359223,0.05148734372113075,5.148734372113075,0.723643534970515,0.617524335498735,1.0,1,2.471315092674316
29
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7238812581796301,0.4039456988919502,13.314773371306682,0.1331477337130668,0.3813737464821349,0.05736981465136805,0.05736981465136805,5.845542806707855,5.902912621359223,0.05127418810757766,5.127418810757766,0.7238812581796301,0.6181437496179476,1.0,1,2.206531332744925
30
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7252625281686607,0.4012797167602334,13.19924345265053,0.1319924345265053,0.3798291332004637,0.06266548984995587,0.06266548984995587,5.847308031774051,5.909973521624007,0.05081388730791121,5.081388730791121,0.7252625281686607,0.6202251404786316,1.0,1,2.383053839364519
31
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.7261167238322592,0.3987395126194482,12.656486100206328,0.1265648610020633,0.376975448872996,0.05648720211827008,0.05648720211827008,5.820829655781112,5.877316857899382,0.05012721880128273,5.0127218801282725,0.7261167238322592,0.6223042523816,1.0,1,2.118270079435128
32
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7264630642225547,0.3964859769229444,12.284961706379857,0.1228496170637985,0.3744555065346823,0.04942630185348632,0.04942630185348632,0.09267431597528684,0.14210061782877317,0.001266948385624464,0.1266948385624464,0.7264630642225547,0.7237053873903428,1.0,0,1.7652250661959399
results/mac-results_rpp_with_mnt_2048_metrics.csv CHANGED
@@ -1,31 +1,43 @@
1
- model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rr,rap,translation_completeness,num_max_output_tokens
2
- internlm/internlm2_5-7b-chat,1.00,0.739699612254078,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096466800937898,0.05383936451897617,12.606354810238305,12.660194174757281,0.10371655820679682,0.5325881640465967,1.0,2
3
- internlm/internlm2_5-7b-chat,1.02,0.740223803961056,0.4266246904302194,14.583816688798017,0.1458381668879802,0.4071727106228415,0.06266548984995587,9.849073256840247,9.911738746690203,0.0832234063051179,0.5703659582906754,1.0,1
4
- internlm/internlm2_5-7b-chat,1.04,0.7398856264610577,0.4154585167056314,13.534659133050225,0.1353465913305021,0.3968657713589718,0.07237422771403354,6.529567519858782,6.601941747572815,0.05613508442776736,0.6221485884888651,1.0,1
5
- internlm/internlm2_5-7b-chat,1.06,0.7379362287241489,0.4039588647855378,12.346740971499404,0.1234674097149939,0.3872447044295494,0.06796116504854369,6.533980582524272,6.601941747572815,0.05513987689359035,0.6224742543197805,0.999117387466902,1
6
- internlm/internlm2_5-7b-chat,1.08,0.7319988705684732,0.3873176839854818,11.075674965706344,0.1107567496570634,0.3724352909668609,0.05207413945278023,9.83495145631068,9.88702559576346,0.07906717392378437,0.5717343310562308,0.999117387466902,1
7
- internlm/internlm2_5-7b-chat,1.10,0.7295350462119345,0.3769306874386757,10.305163787094209,0.1030516378709421,0.3634496155759507,0.07855251544571933,6.527802294792586,6.606354810238305,0.053004659594657756,0.6195690090849183,0.999117387466902,1
8
- microsoft/Phi-3.5-mini-instruct,1.00,0.7107840433177544,0.3796831545348129,8.71296896471494,0.0871296896471493,0.3589874395901284,10.670785525154457,17.93821712268314,28.6090026478376,0.20225504327262062,0.3608526271635592,1.0,6
9
- microsoft/Phi-3.5-mini-instruct,1.02,0.7164765837070485,0.3780585837553919,10.291240080163629,0.1029124008016362,0.3546952732427276,3.585172109443954,7.1403353927625774,10.725507502206531,0.08530053839296368,0.5483240204881398,1.0,2
10
- microsoft/Phi-3.5-mini-instruct,1.04,0.7111233387336411,0.3547161333845742,8.966881655527896,0.0896688165552789,0.3300979657678754,3.6125330979699912,0.07325684024713151,3.685789938217123,0.02973427131098516,0.6495566110355127,1.0,1
11
- microsoft/Phi-3.5-mini-instruct,1.06,0.7024363270136286,0.3298733737040869,7.076233088011138,0.0707623308801113,0.3019513312669543,0.04589585172109444,0.05207413945278023,0.09796999117387467,0.0007571675113745661,0.7008419489376413,1.0,0
12
- microsoft/Phi-3.5-mini-instruct,1.08,0.6882111219210848,0.3054541022592767,5.105510599247868,0.0510551059924786,0.2736030007297014,3.3609885260370698,0.06443071491615181,3.4254192409532216,0.023581380370521147,0.6406632969877,1.0,1
13
- microsoft/Phi-3.5-mini-instruct,1.10,0.6712992989638161,0.2903831801547132,4.091958857999118,0.0409195885799911,0.251653275009876,0.32215357458075905,0.06531332744924978,0.38746690203000883,0.0023407216247487324,0.6665963500989894,1.0,0
14
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.7501818982248062,0.4611110508507017,17.87914973742753,0.1787914973742752,0.4340662057009564,0.00706090026478376,0.1262135922330097,0.13327449249779347,0.0011265209898463904,0.7476494662426587,1.0,0
15
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.7485114382045625,0.4571517219079576,17.436884594979905,0.174368845949799,0.4311385932640979,0.00706090026478376,0.11562224183583407,0.12268314210061783,0.0010382199383043404,0.7461824993322019,1.0,0
16
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7500591586357918,0.4560467960364254,17.440173470996626,0.1744017347099662,0.4302844557731285,0.00706090026478376,0.13062665489849956,0.13768755516328332,0.0011593944393659004,0.7474533388920676,1.0,0
17
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.748812871571673,0.4520416361219855,16.89523258317781,0.168952325831778,0.4260026774745837,0.00706090026478376,0.0997352162400706,0.10679611650485436,0.0008902491962006224,0.7468147612728927,1.0,0
18
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7473851635144647,0.4442106511292453,16.16623784482793,0.1616623784482792,0.4195129470585874,0.01059135039717564,0.13062665489849956,0.1412180052956752,0.001176591707969938,0.7447501647073623,1.0,0
19
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7465709781131172,0.4379837926138161,15.60172257624066,0.1560172257624066,0.4132562932940978,0.01059135039717564,0.07855251544571933,0.08914386584289496,0.000734476013176936,0.7449271706150111,1.0,0
20
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.7426396049131678,0.433632501662176,15.209540658023398,0.1520954065802339,0.4089208235151474,0.00353045013239188,3.901147396293027,3.904677846425419,0.03237065275450547,0.6728297734832243,1.0,1
21
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.7436477056353469,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4068967964789407,0.0,3.8905560458958517,3.8905560458958517,0.03219656852361788,0.6741068111074712,1.0,1
22
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.7440943776351209,0.4320478700956207,15.05135166158296,0.1505135166158296,0.4062008380201262,0.00353045013239188,0.1526919682259488,0.1562224183583407,0.001352332200022921,0.7410796698393737,1.0,0
23
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.7426502735395928,0.4275429314912545,14.449130821290163,0.1444913082129016,0.4001409979222783,0.00706090026478376,0.13768755516328332,0.14474845542806708,0.0012399256044637321,0.7398912041420567,1.0,0
24
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.7408098006080129,0.4206626658729054,13.933703757385222,0.1393370375738522,0.3964824268676203,0.00353045013239188,0.1297440423654016,0.13327449249779347,0.001134996993385448,0.7382902118097237,1.0,0
25
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.7392685912871718,0.4111211240399151,13.303738403756984,0.1330373840375698,0.3870959581563503,0.00353045013239188,0.12180052956751986,0.12533097969991175,0.0010529672171262895,0.7369357726201563,1.0,0
26
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7240239171358935,0.4068335357738006,13.565136550617618,0.1356513655061761,0.3866395067055498,0.0529567519858782,0.1209179170344219,0.17387466902030008,0.001578993772192076,0.7205996419729696,1.0,0
27
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.7263097057327799,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863697821025159,0.06001765225066196,6.236540158870256,6.296557811120918,0.0541899611084103,0.6145165811709306,1.0,1
28
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7276128307708258,0.4054859896994975,13.295092218891954,0.1329509221889195,0.3851203729935697,0.05207413945278023,0.1297440423654016,0.18181818181818182,0.0016533037985858635,0.7240098989116803,1.0,0
29
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7276865132383193,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3804952786306688,0.05207413945278023,0.13415710503089143,0.18623124448367168,0.001691057431836761,0.7240010735018495,1.0,0
30
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.726393195584298,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3788945955746495,0.05648720211827008,0.15357458075904679,0.21006178287731686,0.0018871365478087807,0.7222885419382362,1.0,0
31
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7244012304511832,0.3932239948456176,12.361161644811926,0.1236116164481192,0.3733413807007665,0.05030891438658429,0.08561341571050309,0.13592233009708737,0.0012217374057913526,0.721749388705754,1.0,0
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,newline_score,repetition_score,total_repetitions,rr,rrp,perf,rap,translation_completeness,num_max_output_tokens,percentage_of_repeated_entries
2
+ Qwen/Qwen2-72B-Instruct,1.00,0.7570170721487987,0.4789208192601603,18.953782447211417,0.1895378244721141,0.4522514493620528,0.07855251544571933,0.07855251544571933,0.06266548984995587,0.1412180052956752,0.0012165820128349403,0.12165820128349403,0.7570170721487987,0.7542575120443845,1.0,0,2.206531332744925
3
+ Qwen/Qwen2-72B-Instruct,1.02,0.7573583127909388,0.4796836675632054,19.0137490702917,0.190137490702917,0.4526922175986438,0.0997352162400706,0.0997352162400706,0.07413945278022947,0.17387466902030008,0.0014956534942869072,0.1495653494286907,0.7573583127909388,0.7539651560209267,1.0,0,2.64783759929391
4
+ Qwen/Qwen2-72B-Instruct,1.04,0.7574985287458835,0.476362681282195,18.52063321160408,0.1852063321160408,0.4491791286896135,0.08384819064430715,0.08384819064430715,0.08649602824360106,0.1703442188879082,0.0014611139290337722,0.1461113929033772,0.7574985287458835,0.7541830028744213,1.0,0,2.471315092674316
5
+ Qwen/Qwen2-72B-Instruct,1.06,0.7563852697268905,0.4679636383950649,17.982473951038504,0.1798247395103851,0.4414302384776113,0.07325684024713151,0.07325684024713151,0.05472197705207414,0.12797881729920565,0.0010829866531231096,0.10829866531231096,0.7563852697268905,0.7539304647132251,1.0,0,2.118270079435128
6
+ Qwen/Qwen2-72B-Instruct,1.08,0.7554471646908207,0.4597578197711645,17.067954025424825,0.1706795402542483,0.4319052863160809,0.06354810238305383,0.06354810238305383,0.03795233892321271,0.10150044130626655,0.0008500258703525761,0.0850025870352576,0.7554471646908207,0.7535223528572281,1.0,0,1.853486319505737
7
+ Qwen/Qwen2-72B-Instruct,1.10,0.75500979801247,0.4515778511124262,16.22452191616505,0.1622452191616504,0.4244761354058755,0.05736981465136805,0.05736981465136805,0.02824360105913504,0.08561341571050309,0.0007121356728580868,0.07121356728580867,0.75500979801247,0.7533979381889341,1.0,0,1.676963812886143
8
+ Qwen/Qwen2-7B-Instruct,1.00,0.7457723188010971,0.442240791493943,14.38814929350883,0.1438814929350883,0.4162759057739747,0.05825242718446602,0.05825242718446602,10.774933804060018,10.833186231244484,0.08396037978493448,8.396037978493448,0.7457723188010971,0.5732565603116205,0.9947043248014121,2,2.294792586054722
9
+ Qwen/Qwen2-7B-Instruct,1.02,0.7474082109944966,0.4400998640836595,15.16172261831792,0.1516172261831792,0.4165035906118025,0.05648720211827008,0.05648720211827008,7.036187113857017,7.0926743159752865,0.057428303949803826,5.742830394980382,0.7474082109944966,0.6258943742595823,0.9947043248014121,1,2.294792586054722
10
+ Qwen/Qwen2-7B-Instruct,1.04,0.7484377450576842,0.4390136558190875,14.958631815014014,0.1495863181501401,0.4142970757686492,0.0529567519858782,0.0529567519858782,0.10856134157105031,0.1615180935569285,0.001379851157039126,0.1379851157039126,0.7484377450576842,0.745343820078089,0.999117387466902,0,1.853486319505737
11
+ Qwen/Qwen2-7B-Instruct,1.06,0.7471614604436078,0.4328321576515084,14.28087386760537,0.1428087386760537,0.4073319764254861,0.06707855251544571,0.06707855251544571,0.11032656663724624,0.17740511915269197,0.0015113918339724791,0.1511391833972479,0.7471614604436078,0.743778816908725,0.9982347749338041,0,2.206531332744925
12
+ Qwen/Qwen2-7B-Instruct,1.08,0.7451942060389355,0.423560805217557,13.659683698817108,0.1365968369881711,0.3968597388257214,0.06443071491615181,0.06443071491615181,0.13062665489849956,0.19505736981465135,0.0016449082275183469,0.1644908227518347,0.7451942060389355,0.7415229233479677,1.0,0,2.206531332744925
13
+ Qwen/Qwen2-7B-Instruct,1.10,0.7432072967790653,0.4135053136541433,12.922649874705083,0.1292264987470507,0.3878186285063445,0.06707855251544571,0.06707855251544571,0.0997352162400706,0.16681376875551632,0.0013848487290898832,0.1384848729089883,0.7432072967790653,0.7401238817462625,1.0,0,2.030008826125331
14
+ internlm/internlm2_5-7b-chat,1.00,0.739699612254078,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096466800937898,0.05383936451897617,0.05383936451897617,12.606354810238305,12.660194174757281,0.10371655820679682,10.371655820679681,0.739699612254078,0.5325881640465967,1.0,2,2.030008826125331
15
+ internlm/internlm2_5-7b-chat,1.02,0.740223803961056,0.4266246904302194,14.583816688798017,0.1458381668879802,0.4071727106228415,0.06266548984995587,0.06266548984995587,9.849073256840247,9.911738746690203,0.0832234063051179,8.32234063051179,0.740223803961056,0.5703659582906754,1.0,1,2.118270079435128
16
+ internlm/internlm2_5-7b-chat,1.04,0.7398856264610577,0.4154585167056314,13.534659133050225,0.1353465913305021,0.3968657713589718,0.07237422771403354,0.07237422771403354,6.529567519858782,6.601941747572815,0.05613508442776736,5.613508442776736,0.7398856264610577,0.6221485884888651,1.0,1,2.294792586054722
17
+ internlm/internlm2_5-7b-chat,1.06,0.7379362287241489,0.4039588647855378,12.346740971499404,0.1234674097149939,0.3872447044295494,0.06796116504854369,0.06796116504854369,6.533980582524272,6.601941747572815,0.05513987689359035,5.513987689359035,0.7379362287241489,0.6224742543197805,0.999117387466902,1,2.383053839364519
18
+ internlm/internlm2_5-7b-chat,1.08,0.7319988705684732,0.3873176839854818,11.075674965706344,0.1107567496570634,0.3724352909668609,0.05207413945278023,0.05207413945278023,9.83495145631068,9.88702559576346,0.07906717392378437,7.906717392378438,0.7319988705684732,0.5717343310562308,0.999117387466902,1,1.941747572815534
19
+ internlm/internlm2_5-7b-chat,1.10,0.7295350462119345,0.3769306874386757,10.305163787094209,0.1030516378709421,0.3634496155759507,0.07855251544571933,0.07855251544571933,6.527802294792586,6.606354810238305,0.053004659594657756,5.300465959465775,0.7295350462119345,0.6195690090849183,0.999117387466902,1,2.383053839364519
20
+ microsoft/Phi-3.5-mini-instruct,1.00,0.7107840433177544,0.3796831545348129,8.71296896471494,0.0871296896471493,0.3589874395901284,10.670785525154457,10.670785525154457,17.93821712268314,28.6090026478376,0.20225504327262062,20.225504327262062,0.7107840433177544,0.3608526271635592,1.0,6,2.030008826125331
21
+ microsoft/Phi-3.5-mini-instruct,1.02,0.7164765837070485,0.3780585837553919,10.291240080163629,0.1029124008016362,0.3546952732427276,3.585172109443954,3.585172109443954,7.1403353927625774,10.725507502206531,0.08530053839296368,8.530053839296368,0.7164765837070485,0.5483240204881398,1.0,2,1.323918799646955
22
+ microsoft/Phi-3.5-mini-instruct,1.04,0.7111233387336411,0.3547161333845742,8.966881655527896,0.0896688165552789,0.3300979657678754,3.6125330979699912,3.6125330979699912,0.07325684024713151,3.685789938217123,0.02973427131098516,2.973427131098516,0.7111233387336411,0.6495566110355127,1.0,1,1.412180052956752
23
+ microsoft/Phi-3.5-mini-instruct,1.06,0.7024363270136286,0.3298733737040869,7.076233088011138,0.0707623308801113,0.3019513312669543,0.04589585172109444,0.04589585172109444,0.05207413945278023,0.09796999117387467,0.0007571675113745661,0.0757167511374566,0.7024363270136286,0.7008419489376413,1.0,0,1.412180052956752
24
+ microsoft/Phi-3.5-mini-instruct,1.08,0.6882111219210848,0.3054541022592767,5.105510599247868,0.0510551059924786,0.2736030007297014,3.3609885260370698,3.3609885260370698,0.06443071491615181,3.4254192409532216,0.023581380370521147,2.3581380370521146,0.6882111219210848,0.6406632969877,1.0,1,3.089143865842895
25
+ microsoft/Phi-3.5-mini-instruct,1.10,0.6712992989638161,0.2903831801547132,4.091958857999118,0.0409195885799911,0.251653275009876,0.32215357458075905,0.32215357458075905,0.06531332744924978,0.38746690203000883,0.0023407216247487324,0.23407216247487322,0.6712992989638161,0.6665963500989894,1.0,0,6.443071491615181
26
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.7501818982248062,0.4611110508507017,17.87914973742753,0.1787914973742752,0.4340662057009564,0.00706090026478376,0.00706090026478376,0.1262135922330097,0.13327449249779347,0.0011265209898463904,0.11265209898463904,0.7501818982248062,0.7476494662426587,1.0,0,1.853486319505737
27
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.7485114382045625,0.4571517219079576,17.436884594979905,0.174368845949799,0.4311385932640979,0.00706090026478376,0.00706090026478376,0.11562224183583407,0.12268314210061783,0.0010382199383043404,0.10382199383043404,0.7485114382045625,0.7461824993322019,1.0,0,1.588702559576346
28
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7500591586357918,0.4560467960364254,17.440173470996626,0.1744017347099662,0.4302844557731285,0.00706090026478376,0.00706090026478376,0.13062665489849956,0.13768755516328332,0.0011593944393659004,0.11593944393659004,0.7500591586357918,0.7474533388920676,1.0,0,1.7652250661959399
29
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.748812871571673,0.4520416361219855,16.89523258317781,0.168952325831778,0.4260026774745837,0.00706090026478376,0.00706090026478376,0.0997352162400706,0.10679611650485436,0.0008902491962006224,0.08902491962006225,0.748812871571673,0.7468147612728927,1.0,0,1.500441306266549
30
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7473851635144647,0.4442106511292453,16.16623784482793,0.1616623784482792,0.4195129470585874,0.01059135039717564,0.01059135039717564,0.13062665489849956,0.1412180052956752,0.001176591707969938,0.1176591707969938,0.7473851635144647,0.7447501647073623,1.0,0,1.676963812886143
31
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7465709781131172,0.4379837926138161,15.60172257624066,0.1560172257624066,0.4132562932940978,0.01059135039717564,0.01059135039717564,0.07855251544571933,0.08914386584289496,0.000734476013176936,0.0734476013176936,0.7465709781131172,0.7449271706150111,1.0,0,1.412180052956752
32
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.7426396049131678,0.433632501662176,15.209540658023398,0.1520954065802339,0.4089208235151474,0.00353045013239188,0.00353045013239188,3.901147396293027,3.904677846425419,0.03237065275450547,3.237065275450547,0.7426396049131678,0.6728297734832243,1.0,1,1.853486319505737
33
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.7436477056353469,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4068967964789407,0.0,0.0,3.8905560458958517,3.8905560458958517,0.03219656852361788,3.219656852361788,0.7436477056353469,0.6741068111074712,1.0,1,1.676963812886143
34
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.7440943776351209,0.4320478700956207,15.05135166158296,0.1505135166158296,0.4062008380201262,0.00353045013239188,0.00353045013239188,0.1526919682259488,0.1562224183583407,0.001352332200022921,0.1352332200022921,0.7440943776351209,0.7410796698393737,1.0,0,1.588702559576346
35
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.7426502735395928,0.4275429314912545,14.449130821290163,0.1444913082129016,0.4001409979222783,0.00706090026478376,0.00706090026478376,0.13768755516328332,0.14474845542806708,0.0012399256044637321,0.12399256044637322,0.7426502735395928,0.7398912041420567,1.0,0,1.588702559576346
36
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.7408098006080129,0.4206626658729054,13.933703757385222,0.1393370375738522,0.3964824268676203,0.00353045013239188,0.00353045013239188,0.1297440423654016,0.13327449249779347,0.001134996993385448,0.1134996993385448,0.7408098006080129,0.7382902118097237,1.0,0,1.235657546337158
37
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.7392685912871718,0.4111211240399151,13.303738403756984,0.1330373840375698,0.3870959581563503,0.00353045013239188,0.00353045013239188,0.12180052956751986,0.12533097969991175,0.0010529672171262895,0.10529672171262895,0.7392685912871718,0.7369357726201563,1.0,0,1.500441306266549
38
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7240239171358935,0.4068335357738006,13.565136550617618,0.1356513655061761,0.3866395067055498,0.0529567519858782,0.0529567519858782,0.1209179170344219,0.17387466902030008,0.001578993772192076,0.1578993772192076,0.7240239171358935,0.7205996419729696,1.0,0,2.471315092674316
39
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.7263097057327799,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863697821025159,0.06001765225066196,0.06001765225066196,6.236540158870256,6.296557811120918,0.0541899611084103,5.4189961108410305,0.7263097057327799,0.6145165811709306,1.0,1,2.471315092674316
40
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7276128307708258,0.4054859896994975,13.295092218891954,0.1329509221889195,0.3851203729935697,0.05207413945278023,0.05207413945278023,0.1297440423654016,0.18181818181818182,0.0016533037985858635,0.16533037985858634,0.7276128307708258,0.7240098989116803,1.0,0,2.294792586054722
41
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7276865132383193,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3804952786306688,0.05207413945278023,0.05207413945278023,0.13415710503089143,0.18623124448367168,0.001691057431836761,0.1691057431836761,0.7276865132383193,0.7240010735018495,1.0,0,2.383053839364519
42
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.726393195584298,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3788945955746495,0.05648720211827008,0.05648720211827008,0.15357458075904679,0.21006178287731686,0.0018871365478087807,0.18871365478087807,0.726393195584298,0.7222885419382362,1.0,0,2.206531332744925
43
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7244012304511832,0.3932239948456176,12.361161644811926,0.1236116164481192,0.3733413807007665,0.05030891438658429,0.05030891438658429,0.08561341571050309,0.13592233009708737,0.0012217374057913526,0.12217374057913527,0.7244012304511832,0.721749388705754,1.0,0,1.853486319505737