dh-mc commited on
Commit
a5c9876
1 Parent(s): f0814f8

Chinese-English Machine Translation - Repetition Metrics for Different Models

Browse files
.gitattributes CHANGED
@@ -79,3 +79,4 @@ notebooks/00c_Data[[:space:]]Analysis_Fine_Tuned.ipynb filter=lfs diff=lfs merge
79
  notebooks/00d_Data[[:space:]]Analysis_Fine_Tuned_RPP.ipynb filter=lfs diff=lfs merge=lfs -text
80
  notebooks/00f_Data[[:space:]]Analysis_Fine_Tuned_RPP_Generic_Prompt.ipynb filter=lfs diff=lfs merge=lfs -text
81
  notebooks/03a_RAPGeT_v2_Data[[:space:]]Analysis_Chat_Template.ipynb filter=lfs diff=lfs merge=lfs -text
 
 
79
  notebooks/00d_Data[[:space:]]Analysis_Fine_Tuned_RPP.ipynb filter=lfs diff=lfs merge=lfs -text
80
  notebooks/00f_Data[[:space:]]Analysis_Fine_Tuned_RPP_Generic_Prompt.ipynb filter=lfs diff=lfs merge=lfs -text
81
  notebooks/03a_RAPGeT_v2_Data[[:space:]]Analysis_Chat_Template.ipynb filter=lfs diff=lfs merge=lfs -text
82
+ notebooks/03c_RAPGeT_v2_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
eval_modules/calc_repetitions_v2e.py ADDED
@@ -0,0 +1 @@
 
 
1
+ /Users/inflaton/code/engd/papers/rapget-v2/eval_modules/calc_repetitions_v2e.py
llm_toolkit/translation_utils_v2.py CHANGED
@@ -9,7 +9,7 @@ from datasets import load_dataset
9
  from langchain_openai import ChatOpenAI
10
  from langchain_core.prompts import ChatPromptTemplate
11
  from tqdm import tqdm
12
- from eval_modules.calc_repetitions_v2d import *
13
  from llm_toolkit.llm_utils import load_tokenizer, print_row_details
14
 
15
  print(f"loading {__file__}")
@@ -228,14 +228,11 @@ def count_entries_with_max_tokens(entries, max_tokens):
228
 
229
 
230
  def detect_repetition_scores(row, col, debug=False):
231
- # print(f"row: {row}")
232
  text = row[col] if isinstance(row[col], str) else ""
233
- newline_score, repetition_score, total_repetitions = detect_repetitions(
234
- text, debug=debug
 
235
  )
236
- newline_score -= row["ground_truth_ews_score"]
237
- repetition_score -= row["ground_truth_repetition_score"]
238
- total_repetitions -= row["ground_truth_total_repetitions"]
239
 
240
  return pd.Series(
241
  [
@@ -294,19 +291,11 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
294
  ews_score = []
295
  repetition_score = []
296
  total_repetitions = []
297
- nrr = []
298
  num_max_output_tokens = []
299
  translation_completeness = []
300
  columns = df.columns[2:]
301
 
302
- df[
303
- [
304
- "ground_truth_ews_score",
305
- "ground_truth_repetition_score",
306
- "ground_truth_total_repetitions",
307
- ]
308
- ] = df["english"].apply(detect_scores)
309
-
310
  new_col = f"count_chinese_characters-ground_truth"
311
  df[new_col] = df["chinese"].apply(count_chinese_characters)
312
 
@@ -358,7 +347,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
358
  repetition_score.append(df["repetition_score"].mean())
359
  total_repetitions.append(df["total_repetitions"].mean())
360
 
361
- nrr.append(1 - df["total_repetitions"].mean() / df["answer_len"].mean())
362
 
363
  model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
364
 
@@ -392,9 +381,9 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
392
  metrics_df["ews_score"] = ews_score
393
  metrics_df["repetition_score"] = repetition_score
394
  metrics_df["total_repetitions"] = total_repetitions
395
- metrics_df["nrr"] = nrr
396
  metrics_df["rap"] = metrics_df.apply(
397
- lambda x: x["comet"] * math.exp(x["nrr"] - 1), axis=1
398
  )
399
 
400
  metrics_df["translation_completeness"] = translation_completeness
 
9
  from langchain_openai import ChatOpenAI
10
  from langchain_core.prompts import ChatPromptTemplate
11
  from tqdm import tqdm
12
+ from eval_modules.calc_repetitions_v2e import *
13
  from llm_toolkit.llm_utils import load_tokenizer, print_row_details
14
 
15
  print(f"loading {__file__}")
 
228
 
229
 
230
  def detect_repetition_scores(row, col, debug=False):
 
231
  text = row[col] if isinstance(row[col], str) else ""
232
+
233
+ newline_score, repetition_score, total_repetitions = detect_scores(
234
+ row, debug=debug, answer_col=col, ground_truth_col="english"
235
  )
 
 
 
236
 
237
  return pd.Series(
238
  [
 
291
  ews_score = []
292
  repetition_score = []
293
  total_repetitions = []
294
+ rr = []
295
  num_max_output_tokens = []
296
  translation_completeness = []
297
  columns = df.columns[2:]
298
 
 
 
 
 
 
 
 
 
299
  new_col = f"count_chinese_characters-ground_truth"
300
  df[new_col] = df["chinese"].apply(count_chinese_characters)
301
 
 
347
  repetition_score.append(df["repetition_score"].mean())
348
  total_repetitions.append(df["total_repetitions"].mean())
349
 
350
+ rr.append(df["total_repetitions"].mean() / df["answer_len"].mean())
351
 
352
  model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
353
 
 
381
  metrics_df["ews_score"] = ews_score
382
  metrics_df["repetition_score"] = repetition_score
383
  metrics_df["total_repetitions"] = total_repetitions
384
+ metrics_df["rr"] = rr
385
  metrics_df["rap"] = metrics_df.apply(
386
+ lambda x: calc_adjusted_performance(x["comet"], x["rr"]), axis=1
387
  )
388
 
389
  metrics_df["translation_completeness"] = translation_completeness
notebooks/03a_RAPGeT_v2_Data Analysis_Chat_Template.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fe4375d3e38a2cc42877744e7f376f8c10450cfa66950f23b4263d0e4d65a50
3
- size 1548885
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6273fc3413aa0c507438f5061c2047948cee5516d16244aacbd2f6f72b19dfff
3
+ size 1562071
notebooks/03b_RAPGeT_v2_Data Analysis_Generic_Prompt.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86838f60ba444f15837a4024360d800a8af5ab9843890ae89c3d7ff79c6f38dc
3
- size 14320166
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b948f85d9b7ba464ddadbd63be90d285641d64d366140a55a2999f69b21b2021
3
+ size 14255738
notebooks/03c_RAPGeT_v2_Data Analysis.ipynb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e2bc3dfca4cc2a8adf32d889114798a0eb59a2ebac079e5b2853a80ea4edbd9
3
+ size 11641357
requirements.txt CHANGED
@@ -16,7 +16,8 @@ langchain==0.3.3
16
  langchain-community==0.3.2
17
  openai==1.51.2
18
  wandb==0.17.6
19
- transformers==4.45.1
 
20
  bitsandbytes #==0.43.3
21
  sentencepiece==0.1.98
22
  einops==0.8.0
@@ -25,3 +26,4 @@ peft==0.11.1
25
  sacrebleu==2.4.2
26
  unbabel-comet==2.2.2
27
  gradio==5.0.2
 
 
16
  langchain-community==0.3.2
17
  openai==1.51.2
18
  wandb==0.17.6
19
+ # transformers==4.45.1
20
+ transformers==4.43.4
21
  bitsandbytes #==0.43.3
22
  sentencepiece==0.1.98
23
  einops==0.8.0
 
26
  sacrebleu==2.4.2
27
  unbabel-comet==2.2.2
28
  gradio==5.0.2
29
+ protobuf==4.25.4
results/mac-results_rpp_with_mnt_2048_generic_prompt_metrics.csv CHANGED
@@ -1,26 +1,26 @@
1
- model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,nrr,rap,translation_completeness,num_max_output_tokens
2
- internlm/internlm2_5-7b-chat,1.00,0.7357995069773978,0.4297612514398102,15.060226683930628,0.1506022668393063,0.4097577795330234,0.04942630185348632,9.235657546337158,9.285083848190645,0.9247496423462088,0.6824623187873116,1.0,2
3
- internlm/internlm2_5-7b-chat,1.02,0.7377187550620283,0.4246676977198055,14.728605282752795,0.147286052827528,0.4063246630867048,0.06972639011473963,5.35657546337158,5.4210061782877315,0.953789668507456,0.7044041934116965,1.0,1
4
- internlm/internlm2_5-7b-chat,1.04,0.7371160490183523,0.4173352728374962,13.846403511622256,0.1384640351162226,0.3988121301027288,0.06884377758164166,5.315092674315975,5.383053839364519,0.9549885977018281,0.7046730517422506,1.0,1
5
- internlm/internlm2_5-7b-chat,1.06,0.7338597697698218,0.3997609847704189,12.213374588416173,0.1221337458841617,0.3841365748920261,0.05825242718446602,5.275375110326567,5.332744924977935,0.9561764257893248,0.7023939203825759,1.0,1
6
- internlm/internlm2_5-7b-chat,1.08,0.7318234702626478,0.3881614120395272,11.369735763522288,0.1136973576352228,0.372963223209074,0.06707855251544571,5.283318623124448,5.345101500441306,0.9570359334539392,0.7010472282786626,1.0,1
7
- internlm/internlm2_5-7b-chat,1.10,0.7288648442604431,0.3784182249483568,10.377989030628608,0.103779890306286,0.3618424457502351,0.05207413945278023,5.288614298323036,5.340688437775817,0.957823935317488,0.6987634348896543,1.0,1
8
- microsoft/Phi-3.5-mini-instruct,1.00,0.710605339281136,0.3788926591792472,9.70032874202361,0.097003287420236,0.3556134739443916,5.390997352162401,12.997352162400706,18.368049426301855,0.8624429902835613,0.6192816563968827,1.0,4
9
- microsoft/Phi-3.5-mini-instruct,1.02,0.7150978385770836,0.3741049510326346,9.910633597905436,0.0991063359790543,0.3453160556383774,3.586054721977052,7.001765225066196,10.567519858781994,0.9183516206245184,0.6590312746582365,1.0,2
10
- microsoft/Phi-3.5-mini-instruct,1.04,0.7074641684778791,0.3538698731015666,9.19721270538052,0.0919721270538052,0.3225824135517728,0.05119152691968226,0.05560458958517211,0.10150044130626655,0.9991834532118691,0.7068867266696017,1.0,0
11
- microsoft/Phi-3.5-mini-instruct,1.06,0.6962301708225224,0.3252854575717334,6.967166383106307,0.069671663831063,0.2948764736589108,0.0353045013239188,0.06796116504854369,0.09796999117387467,0.9992538065947363,0.6957108422443861,1.0,0
12
- microsoft/Phi-3.5-mini-instruct,1.08,0.6823413657174107,0.301599095293242,5.452744292893752,0.0545274429289375,0.2726387617958179,0.07678729037952339,0.04766107678729038,0.11297440423654016,0.9991814653050001,0.6817830741574145,1.0,0
13
- microsoft/Phi-3.5-mini-instruct,1.10,0.6717851540206916,0.2885734336603344,4.751039447225815,0.0475103944722581,0.2604284999048123,0.08031774051191527,0.02383053839364519,0.10414827890556046,0.999281171568508,0.6713024292710505,1.0,0
14
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.739080294072365,0.4490104515425626,6.7013404492782405,0.0670134044927823,0.4196181637680596,0.36716681376875554,139.80935569285083,140.15798764342455,0.5164419894213406,0.4557063153750335,0.999117387466902,15
15
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.743018615750854,0.4514907128972251,8.545954556237808,0.085459545562378,0.4214940415288087,1.0035304501323918,67.00353045013239,67.98852603706973,0.7071153729164131,0.5543722941592363,1.0,6
16
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7432195577780335,0.4517500968367987,10.080425294411064,0.1008042529441106,0.4200973007348334,0.01059135039717564,35.19770520741395,35.18358340688438,0.8244802169835412,0.6235766669370041,1.0,6
17
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.7430821573139815,0.4484154407825542,10.37470506193322,0.1037470506193321,0.4160289393328045,1.8005295675198587,26.880847308031775,28.656663724624888,0.8478345432646117,0.6381932626507077,1.0,3
18
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7435937259684909,0.4407733547418294,10.930453247368872,0.1093045324736887,0.4113063412348818,0.09267431597528684,12.007943512797882,12.072374227714034,0.9329421050825354,0.6953650250037441,1.0,3
19
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7427059700687901,0.4358940590119784,11.381344076286156,0.1138134407628615,0.4062980635945339,0.03971756398940865,0.6681376875551632,0.6822594880847308,0.9961814337654126,0.739875306107768,1.0,1
20
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.38886049199135875,0.20558757581682777,0.24345871819597525,0.0024345871819597513,0.18445521880258564,638.2797881729921,3889.9232127096207,4528.131509267431,0.07898833236661085,0.15481161323631304,0.9240953221535746,570
21
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7222260562908512,0.4039898602650971,13.461179673541356,0.1346117967354136,0.3819960428004565,0.05736981465136805,5.87378640776699,5.9179170344218885,0.9486112388485238,0.6860492554476049,1.0,1
22
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.723643534970515,0.4051102919608809,13.18537912294539,0.1318537912294539,0.3824621732976229,0.06266548984995587,5.840247131509267,5.8914386584289495,0.9486127363429205,0.6873967610154514,1.0,1
23
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7238812581796301,0.4039456988919502,13.314773371306682,0.1331477337130668,0.3813737464821349,0.05736981465136805,5.845542806707855,5.889673433362754,0.948840810819099,0.6877794238881113,1.0,1
24
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7252625281686607,0.4012797167602334,13.19924345265053,0.1319924345265053,0.3798291332004637,0.06266548984995587,5.847308031774051,5.884377758164166,0.9494061847846709,0.6894815110844906,1.0,1
25
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.7261167238322592,0.3987395126194482,12.656486100206328,0.1265648610020633,0.376975448872996,0.05648720211827008,5.820829655781112,5.864077669902913,0.9499856972945303,0.6906937144718889,1.0,1
26
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7264630642225547,0.3964859769229444,12.284961706379857,0.1228496170637985,0.3744555065346823,0.04942630185348632,0.09267431597528684,0.12886142983230361,0.9988510902838437,0.7256289030293478,1.0,0
 
1
+ model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rr,rap,translation_completeness,num_max_output_tokens
2
+ internlm/internlm2_5-7b-chat,1.00,0.7357995069773978,0.4297612514398102,15.060226683930628,0.1506022668393063,0.4097577795330234,0.04942630185348632,9.235657546337158,9.285083848190645,0.07525035765379114,0.6824623187873116,1.0,2
3
+ internlm/internlm2_5-7b-chat,1.02,0.7377187550620283,0.4246676977198055,14.728605282752795,0.147286052827528,0.4063246630867048,0.06972639011473963,5.35657546337158,5.426301853486319,0.04625547346404442,0.7043723959353786,1.0,1
4
+ internlm/internlm2_5-7b-chat,1.04,0.7371160490183523,0.4173352728374962,13.846403511622256,0.1384640351162226,0.3988121301027288,0.06884377758164166,5.315092674315975,5.383936451897617,0.04501878242643857,0.704667851183933,1.0,1
5
+ internlm/internlm2_5-7b-chat,1.06,0.7338597697698218,0.3997609847704189,12.213374588416173,0.1221337458841617,0.3841365748920261,0.05825242718446602,5.275375110326567,5.333627537511033,0.043830827367611756,0.7023888258277158,1.0,1
6
+ internlm/internlm2_5-7b-chat,1.08,0.7318234702626478,0.3881614120395272,11.369735763522288,0.1136973576352228,0.372963223209074,0.06707855251544571,5.283318623124448,5.350397175639894,0.04300663332269164,0.7010173875930041,1.0,1
7
+ internlm/internlm2_5-7b-chat,1.10,0.7288648442604431,0.3784182249483568,10.377989030628608,0.103779890306286,0.3618424457502351,0.05207413945278023,5.288614298323036,5.340688437775817,0.042176064682512025,0.6987634348896543,1.0,1
8
+ microsoft/Phi-3.5-mini-instruct,1.00,0.710605339281136,0.3788926591792472,9.70032874202361,0.097003287420236,0.3556134739443916,5.390997352162401,12.997352162400706,18.388349514563107,0.13770903562694164,0.6191875166952294,1.0,4
9
+ microsoft/Phi-3.5-mini-instruct,1.02,0.7150978385770836,0.3741049510326346,9.910633597905436,0.0991063359790543,0.3453160556383774,3.586054721977052,7.001765225066196,10.587819947043249,0.08180522500528503,0.6589279165887452,1.0,2
10
+ microsoft/Phi-3.5-mini-instruct,1.04,0.7074641684778791,0.3538698731015666,9.19721270538052,0.0919721270538052,0.3225824135517728,0.05119152691968226,0.05560458958517211,0.10679611650485436,0.000859149229250836,0.7068566122109297,1.0,0
11
+ microsoft/Phi-3.5-mini-instruct,1.06,0.6962301708225224,0.3252854575717334,6.967166383106307,0.069671663831063,0.2948764736589108,0.0353045013239188,0.06796116504854369,0.10326566637246248,0.0007865281839265906,0.6956827814674672,1.0,0
12
+ microsoft/Phi-3.5-mini-instruct,1.08,0.6823413657174107,0.301599095293242,5.452744292893752,0.0545274429289375,0.2726387617958179,0.07678729037952339,0.04766107678729038,0.12444836716681378,0.0009016671249608319,0.68172639822959,1.0,0
13
+ microsoft/Phi-3.5-mini-instruct,1.10,0.6717851540206916,0.2885734336603344,4.751039447225815,0.0475103944722581,0.2604284999048123,0.08031774051191527,0.02383053839364519,0.10414827890556046,0.0007188284314919954,0.6713024292710504,1.0,0
14
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.739080294072365,0.4490104515425626,6.7013404492782405,0.0670134044927823,0.4196181637680596,0.36716681376875554,139.80935569285083,140.1765225066196,0.48362195756964893,0.45567717525911156,0.999117387466902,15
15
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.743018615750854,0.4514907128972251,8.545954556237808,0.085459545562378,0.4214940415288087,1.0035304501323918,67.00353045013239,68.00706090026479,0.2929644725635723,0.5543280318044165,1.0,6
16
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7432195577780335,0.4517500968367987,10.080425294411064,0.1008042529441106,0.4200973007348334,0.01059135039717564,35.19770520741395,35.208296557811124,0.17564306911947306,0.6234997933386273,1.0,6
17
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.7430821573139815,0.4484154407825542,10.37470506193322,0.1037470506193321,0.4160289393328045,1.8005295675198587,26.880847308031775,28.68137687555163,0.1522966823356282,0.6381095208514015,1.0,3
18
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7435937259684909,0.4407733547418294,10.930453247368872,0.1093045324736887,0.4113063412348818,0.09267431597528684,12.007943512797882,12.100617828773169,0.06721477842655646,0.6952559422553878,1.0,3
19
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7427059700687901,0.4358940590119784,11.381344076286156,0.1138134407628615,0.4062980635945339,0.03971756398940865,0.6681376875551632,0.707855251544572,0.003961824217515018,0.7397693206556093,1.0,1
20
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.3888604919913587,0.2055875758168277,0.2434587181959752,0.0024345871819597,0.1844552188025856,638.2797881729921,3889.9232127096207,4528.203000882612,0.9210262088655917,0.15480936210106447,0.9240953221535746,570
21
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7222260562908512,0.4039898602650971,13.461179673541356,0.1346117967354136,0.3819960428004565,0.05736981465136805,5.87378640776699,5.931156222418358,0.05150372482295595,0.6859703892398439,1.0,1
22
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.723643534970515,0.4051102919608809,13.18537912294539,0.1318537912294539,0.3824621732976229,0.06266548984995587,5.840247131509267,5.902912621359223,0.05148734372113075,0.6873279697459551,1.0,1
23
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7238812581796301,0.4039456988919502,13.314773371306682,0.1331477337130668,0.3813737464821349,0.05736981465136805,5.845542806707855,5.902912621359223,0.05127418810757766,0.6877003345402561,1.0,1
24
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7252625281686607,0.4012797167602334,13.19924345265053,0.1319924345265053,0.3798291332004637,0.06266548984995587,5.847308031774051,5.909973521624007,0.05081388730791121,0.6893297921407147,1.0,1
25
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.7261167238322592,0.3987395126194482,12.656486100206328,0.1265648610020633,0.376975448872996,0.05648720211827008,5.820829655781112,5.877316857899382,0.05012721880128273,0.6906157284372703,1.0,1
26
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7264630642225547,0.3964859769229444,12.284961706379857,0.1228496170637985,0.3744555065346823,0.04942630185348632,0.09267431597528684,0.14210061782877317,0.001266948385624464,0.7255432558140477,1.0,0
results/mac-results_rpp_with_mnt_2048_metrics.csv CHANGED
@@ -1,31 +1,31 @@
1
- model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,nrr,rap,translation_completeness,num_max_output_tokens
2
- internlm/internlm2_5-7b-chat,1.00,0.739699612254078,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096466800937898,0.05383936451897617,12.606354810238305,12.646954986760813,0.8963919016630513,0.6668973100142501,1.0,2
3
- internlm/internlm2_5-7b-chat,1.02,0.740223803961056,0.4266246904302194,14.583816688798017,0.1458381668879802,0.4071727106228415,0.06266548984995587,9.849073256840247,9.910856134157106,0.916784004505773,0.6811186927169239,1.0,1
4
- internlm/internlm2_5-7b-chat,1.04,0.7398856264610577,0.4154585167056314,13.534659133050225,0.1353465913305021,0.3968657713589718,0.07237422771403354,6.529567519858782,6.596646072374227,0.9439099437148217,0.6995278161884907,1.0,1
5
- internlm/internlm2_5-7b-chat,1.06,0.7379362287241489,0.4039588647855378,12.346740971499404,0.1234674097149939,0.3872447044295494,0.06796116504854369,6.533980582524272,6.596646072374227,0.9449043529541853,0.6983788795529117,0.999117387466902,1
6
- internlm/internlm2_5-7b-chat,1.08,0.7319988705684732,0.3873176839854818,11.075674965706344,0.1107567496570634,0.3724352909668609,0.05207413945278023,9.83495145631068,9.881729920564872,0.920975175928344,0.6763793903551222,0.999117387466902,1
7
- internlm/internlm2_5-7b-chat,1.10,0.7295350462119345,0.3769306874386757,10.305163787094209,0.1030516378709421,0.3634496155759507,0.07855251544571933,6.527802294792586,6.596646072374227,0.9470732363646663,0.6919271286047086,0.999117387466902,1
8
- microsoft/Phi-3.5-mini-instruct,1.00,0.7107840433177544,0.3796831545348129,8.71296896471494,0.0871296896471493,0.3589874395901284,10.670785525154457,17.93821712268314,28.58340688437776,0.7979259092866101,0.5807350079921869,1.0,6
9
- microsoft/Phi-3.5-mini-instruct,1.02,0.7164765837070485,0.3780585837553919,10.291240080163629,0.1029124008016362,0.3546952732427276,3.585172109443954,7.1403353927625774,10.705207413945278,0.914860909301493,0.6580010154348458,1.0,2
10
- microsoft/Phi-3.5-mini-instruct,1.04,0.7111233387336411,0.3547161333845742,8.966881655527896,0.0896688165552789,0.3300979657678754,3.6125330979699912,0.07325684024713151,3.685789938217123,0.9702657286890148,0.6902898733301204,1.0,1
11
- microsoft/Phi-3.5-mini-instruct,1.06,0.7024363270136286,0.3298733737040869,7.076233088011138,0.0707623308801113,0.3019513312669543,0.04589585172109444,0.05207413945278023,0.0970873786407767,0.9992496538175567,0.7019094542904326,1.0,0
12
- microsoft/Phi-3.5-mini-instruct,1.08,0.6882111219210848,0.3054541022592767,5.105510599247868,0.0510551059924786,0.2736030007297014,3.3609885260370698,0.06443071491615181,3.414827890556046,0.9764915329416268,0.6722210212114398,1.0,1
13
- microsoft/Phi-3.5-mini-instruct,1.10,0.6712992989638161,0.2903831801547132,4.091958857999118,0.0409195885799911,0.251653275009876,0.32215357458075905,0.06531332744924978,0.3786407766990291,0.9977125977744483,0.6697655223041806,1.0,0
14
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.7501818982248062,0.4611110508507017,17.87914973742753,0.1787914973742752,0.4340662057009564,0.00706090026478376,0.1262135922330097,0.11650485436893204,0.9990152266843727,0.7494435027453233,1.0,0
15
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.7485114382045625,0.4571517219079576,17.436884594979905,0.174368845949799,0.4311385932640979,0.00706090026478376,0.11562224183583407,0.1059135039717564,0.9991036950172912,0.7478408442461326,1.0,0
16
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7500591586357918,0.4560467960364254,17.440173470996626,0.1744017347099662,0.4302844557731285,0.00706090026478376,0.13062665489849956,0.1209179170344219,0.9989818138577363,0.7492958474569671,1.0,0
17
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.748812871571673,0.4520416361219855,16.89523258317781,0.168952325831778,0.4260026774745837,0.00706090026478376,0.0997352162400706,0.09002647837599294,0.999249541999897,0.7482511297698003,1.0,0
18
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7473851635144647,0.4442106511292453,16.16623784482793,0.1616623784482792,0.4195129470585874,0.01059135039717564,0.13062665489849956,0.12444836716681378,0.9989631285573515,0.7466106228007235,1.0,0
19
- shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7465709781131172,0.4379837926138161,15.60172257624066,0.1560172257624066,0.4132562932940978,0.01059135039717564,0.07855251544571933,0.06531332744924978,0.9994618690596525,0.7461693332490859,1.0,0
20
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.7426396049131678,0.433632501662176,15.209540658023398,0.1520954065802339,0.4089208235151474,0.00353045013239188,3.901147396293027,3.889673433362754,0.9677537371860069,0.7190742424186368,1.0,1
21
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.7436477056353469,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4068967964789407,0.0,3.8905560458958517,3.8693733451015007,0.9679787303975633,0.7202123788620469,1.0,1
22
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.7440943776351209,0.4320478700956207,15.05135166158296,0.1505135166158296,0.4062008380201262,0.00353045013239188,0.1526919682259488,0.13503971756398941,0.9988310348779463,0.7432250654569489,1.0,0
23
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.7426502735395928,0.4275429314912545,14.449130821290163,0.1444913082129016,0.4001409979222783,0.00706090026478376,0.13768755516328332,0.13327449249779347,0.9988583611812559,0.7418029189370554,1.0,0
24
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.7408098006080129,0.4206626658729054,13.933703757385222,0.1393370375738522,0.3964824268676203,0.00353045013239188,0.1297440423654016,0.11738746690203,0.9990003006614552,0.7400695835992323,1.0,0
25
- shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.7392685912871718,0.4111211240399151,13.303738403756984,0.1330373840375698,0.3870959581563503,0.00353045013239188,0.12180052956751986,0.10944395410414828,0.9990805075005376,0.7385891517800307,1.0,0
26
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7240239171358935,0.4068335357738006,13.565136550617618,0.1356513655061761,0.3866395067055498,0.0529567519858782,0.1209179170344219,0.1676963812886143,0.9984771126055,0.7229221493862132,1.0,0
27
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.7263097057327799,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863697821025159,0.06001765225066196,6.236540158870256,6.294792586054722,0.9458252309188138,0.6880088812871149,1.0,1
28
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7276128307708258,0.4054859896994975,13.295092218891954,0.1329509221889195,0.3851203729935697,0.05207413945278023,0.1297440423654016,0.16946160635481025,0.9984590566537452,0.726492484037711,1.0,0
29
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7276865132383193,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3804952786306688,0.05207413945278023,0.13415710503089143,0.18446601941747573,0.9983249715485598,0.7264686378975903,1.0,0
30
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.726393195584298,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3788945955746495,0.05648720211827008,0.15357458075904679,0.21006178287731686,0.9981128634521912,0.7250236850699381,1.0,0
31
- shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7244012304511832,0.3932239948456176,12.361161644811926,0.1236116164481192,0.3733413807007665,0.05030891438658429,0.08561341571050309,0.13592233009708737,0.9987782625942087,0.7235167427869905,1.0,0
 
1
+ model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rr,rap,translation_completeness,num_max_output_tokens
2
+ internlm/internlm2_5-7b-chat,1.00,0.739699612254078,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096466800937898,0.05383936451897617,12.606354810238305,12.660194174757281,0.10371655820679682,0.6668249823411998,1.0,2
3
+ internlm/internlm2_5-7b-chat,1.02,0.740223803961056,0.4266246904302194,14.583816688798017,0.1458381668879802,0.4071727106228415,0.06266548984995587,9.849073256840247,9.911738746690203,0.0832234063051179,0.6811136450938015,1.0,1
4
+ internlm/internlm2_5-7b-chat,1.04,0.7398856264610577,0.4154585167056314,13.534659133050225,0.1353465913305021,0.3968657713589718,0.07237422771403354,6.529567519858782,6.601941747572815,0.05613508442776736,0.6994963184593858,1.0,1
5
+ internlm/internlm2_5-7b-chat,1.06,0.7379362287241489,0.4039588647855378,12.346740971499404,0.1234674097149939,0.3872447044295494,0.06796116504854369,6.533980582524272,6.601941747572815,0.05513987689359035,0.6983479910444814,0.999117387466902,1
6
+ internlm/internlm2_5-7b-chat,1.08,0.7319988705684732,0.3873176839854818,11.075674965706344,0.1107567496570634,0.3724352909668609,0.05207413945278023,9.83495145631068,9.88702559576346,0.07906717392378437,0.676350746394496,0.999117387466902,1
7
+ internlm/internlm2_5-7b-chat,1.10,0.7295350462119345,0.3769306874386757,10.305163787094209,0.1030516378709421,0.3634496155759507,0.07855251544571933,6.527802294792586,6.606354810238305,0.053004659594657756,0.6918732323764201,0.999117387466902,1
8
+ microsoft/Phi-3.5-mini-instruct,1.00,0.7107840433177544,0.3796831545348129,8.71296896471494,0.0871296896471493,0.3589874395901284,10.670785525154457,17.93821712268314,28.6090026478376,0.20225504327262062,0.5806299320134263,1.0,6
9
+ microsoft/Phi-3.5-mini-instruct,1.02,0.7164765837070485,0.3780585837553919,10.291240080163629,0.1029124008016362,0.3546952732427276,3.585172109443954,7.1403353927625774,10.725507502206531,0.08530053839296368,0.6578947912630083,1.0,2
10
+ microsoft/Phi-3.5-mini-instruct,1.04,0.7111233387336411,0.3547161333845742,8.966881655527896,0.0896688165552789,0.3300979657678754,3.6125330979699912,0.07325684024713151,3.685789938217123,0.02973427131098516,0.6902898733301204,1.0,1
11
+ microsoft/Phi-3.5-mini-instruct,1.06,0.7024363270136286,0.3298733737040869,7.076233088011138,0.0707623308801113,0.3019513312669543,0.04589585172109444,0.05207413945278023,0.09796999117387467,0.0007571675113745661,0.701904666351495,1.0,0
12
+ microsoft/Phi-3.5-mini-instruct,1.08,0.6882111219210848,0.3054541022592767,5.105510599247868,0.0510551059924786,0.2736030007297014,3.3609885260370698,0.06443071491615181,3.4254192409532216,0.023581380370521147,0.6721720091371258,1.0,1
13
+ microsoft/Phi-3.5-mini-instruct,1.10,0.6712992989638161,0.2903831801547132,4.091958857999118,0.0409195885799911,0.251653275009876,0.32215357458075905,0.06531332744924978,0.38746690203000883,0.0023407216247487324,0.6697298117609694,1.0,0
14
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.7501818982248062,0.4611110508507017,17.87914973742753,0.1787914973742752,0.4340662057009564,0.00706090026478376,0.1262135922330097,0.13327449249779347,0.0011265209898463904,0.7493372784005554,1.0,0
15
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.7485114382045625,0.4571517219079576,17.436884594979905,0.174368845949799,0.4311385932640979,0.00706090026478376,0.11562224183583407,0.12268314210061783,0.0010382199383043404,0.7477347219762754,1.0,0
16
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7500591586357918,0.4560467960364254,17.440173470996626,0.1744017347099662,0.4302844557731285,0.00706090026478376,0.13062665489849956,0.13768755516328332,0.0011593944393659004,0.7491900481363686,1.0,0
17
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.748812871571673,0.4520416361219855,16.89523258317781,0.168952325831778,0.4260026774745837,0.00706090026478376,0.0997352162400706,0.10679611650485436,0.0008902491962006224,0.7481465381600518,1.0,0
18
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7473851635144647,0.4442106511292453,16.16623784482793,0.1616623784482792,0.4195129470585874,0.01059135039717564,0.13062665489849956,0.1412180052956752,0.001176591707969938,0.7465063134536485,1.0,0
19
+ shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7465709781131172,0.4379837926138161,15.60172257624066,0.1560172257624066,0.4132562932940978,0.01059135039717564,0.07855251544571933,0.08914386584289496,0.000734476013176936,0.7460228409589962,1.0,0
20
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.7426396049131678,0.433632501662176,15.209540658023398,0.1520954065802339,0.4089208235151474,0.00353045013239188,3.901147396293027,3.904677846425419,0.03237065275450547,0.7189848023792343,1.0,1
21
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.7436477056353469,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4068967964789407,0.0,3.8905560458958517,3.8905560458958517,0.03219656852361788,0.7200861374743239,1.0,1
22
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.7440943776351209,0.4320478700956207,15.05135166158296,0.1505135166158296,0.4062008380201262,0.00353045013239188,0.1526919682259488,0.1562224183583407,0.001352332200022921,0.7430887949425674,1.0,0
23
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.7426502735395928,0.4275429314912545,14.449130821290163,0.1444913082129016,0.4001409979222783,0.00706090026478376,0.13768755516328332,0.14474845542806708,0.0012399256044637321,0.7417300130954148,1.0,0
24
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.7408098006080129,0.4206626658729054,13.933703757385222,0.1393370375738522,0.3964824268676203,0.00353045013239188,0.1297440423654016,0.13327449249779347,0.001134996993385448,0.7399694606935023,1.0,0
25
+ shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.7392685912871718,0.4111211240399151,13.303738403756984,0.1330373840375698,0.3870959581563503,0.00353045013239188,0.12180052956751986,0.12533097969991175,0.0010529672171262895,0.7384905753804022,1.0,0
26
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7240239171358935,0.4068335357738006,13.565136550617618,0.1356513655061761,0.3866395067055498,0.0529567519858782,0.1209179170344219,0.17387466902030008,0.001578993772192076,0.7228815899808891,1.0,0
27
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.7263097057327799,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863697821025159,0.06001765225066196,6.236540158870256,6.296557811120918,0.0541899611084103,0.6879984291168549,1.0,1
28
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7276128307708258,0.4054859896994975,13.295092218891954,0.1329509221889195,0.3851203729935697,0.05207413945278023,0.1297440423654016,0.18181818181818182,0.0016533037985858635,0.7264108595993547,1.0,0
29
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7276865132383193,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3804952786306688,0.05207413945278023,0.13415710503089143,0.18623124448367168,0.001691057431836761,0.7264569934393594,1.0,0
30
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.726393195584298,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3788945955746495,0.05648720211827008,0.15357458075904679,0.21006178287731686,0.0018871365478087807,0.7250236850699382,1.0,0
31
+ shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7244012304511832,0.3932239948456176,12.361161644811926,0.1236116164481192,0.3733413807007665,0.05030891438658429,0.08561341571050309,0.13592233009708737,0.0012217374057913526,0.7235167427869905,1.0,0