Spaces:
Build error
Build error
Chinese-English Machine Translation - Repetition Metrics for Different Models
Browse files- .gitattributes +1 -0
- eval_modules/calc_repetitions_v2e.py +1 -0
- llm_toolkit/translation_utils_v2.py +8 -19
- notebooks/03a_RAPGeT_v2_Data Analysis_Chat_Template.ipynb +2 -2
- notebooks/03b_RAPGeT_v2_Data Analysis_Generic_Prompt.ipynb +2 -2
- notebooks/03c_RAPGeT_v2_Data Analysis.ipynb +3 -0
- requirements.txt +3 -1
- results/mac-results_rpp_with_mnt_2048_generic_prompt_metrics.csv +26 -26
- results/mac-results_rpp_with_mnt_2048_metrics.csv +31 -31
.gitattributes
CHANGED
@@ -79,3 +79,4 @@ notebooks/00c_Data[[:space:]]Analysis_Fine_Tuned.ipynb filter=lfs diff=lfs merge
|
|
79 |
notebooks/00d_Data[[:space:]]Analysis_Fine_Tuned_RPP.ipynb filter=lfs diff=lfs merge=lfs -text
|
80 |
notebooks/00f_Data[[:space:]]Analysis_Fine_Tuned_RPP_Generic_Prompt.ipynb filter=lfs diff=lfs merge=lfs -text
|
81 |
notebooks/03a_RAPGeT_v2_Data[[:space:]]Analysis_Chat_Template.ipynb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
79 |
notebooks/00d_Data[[:space:]]Analysis_Fine_Tuned_RPP.ipynb filter=lfs diff=lfs merge=lfs -text
|
80 |
notebooks/00f_Data[[:space:]]Analysis_Fine_Tuned_RPP_Generic_Prompt.ipynb filter=lfs diff=lfs merge=lfs -text
|
81 |
notebooks/03a_RAPGeT_v2_Data[[:space:]]Analysis_Chat_Template.ipynb filter=lfs diff=lfs merge=lfs -text
|
82 |
+
notebooks/03c_RAPGeT_v2_Data[[:space:]]Analysis.ipynb filter=lfs diff=lfs merge=lfs -text
|
eval_modules/calc_repetitions_v2e.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/inflaton/code/engd/papers/rapget-v2/eval_modules/calc_repetitions_v2e.py
|
llm_toolkit/translation_utils_v2.py
CHANGED
@@ -9,7 +9,7 @@ from datasets import load_dataset
|
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
11 |
from tqdm import tqdm
|
12 |
-
from eval_modules.
|
13 |
from llm_toolkit.llm_utils import load_tokenizer, print_row_details
|
14 |
|
15 |
print(f"loading {__file__}")
|
@@ -228,14 +228,11 @@ def count_entries_with_max_tokens(entries, max_tokens):
|
|
228 |
|
229 |
|
230 |
def detect_repetition_scores(row, col, debug=False):
|
231 |
-
# print(f"row: {row}")
|
232 |
text = row[col] if isinstance(row[col], str) else ""
|
233 |
-
|
234 |
-
|
|
|
235 |
)
|
236 |
-
newline_score -= row["ground_truth_ews_score"]
|
237 |
-
repetition_score -= row["ground_truth_repetition_score"]
|
238 |
-
total_repetitions -= row["ground_truth_total_repetitions"]
|
239 |
|
240 |
return pd.Series(
|
241 |
[
|
@@ -294,19 +291,11 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
|
|
294 |
ews_score = []
|
295 |
repetition_score = []
|
296 |
total_repetitions = []
|
297 |
-
|
298 |
num_max_output_tokens = []
|
299 |
translation_completeness = []
|
300 |
columns = df.columns[2:]
|
301 |
|
302 |
-
df[
|
303 |
-
[
|
304 |
-
"ground_truth_ews_score",
|
305 |
-
"ground_truth_repetition_score",
|
306 |
-
"ground_truth_total_repetitions",
|
307 |
-
]
|
308 |
-
] = df["english"].apply(detect_scores)
|
309 |
-
|
310 |
new_col = f"count_chinese_characters-ground_truth"
|
311 |
df[new_col] = df["chinese"].apply(count_chinese_characters)
|
312 |
|
@@ -358,7 +347,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
|
|
358 |
repetition_score.append(df["repetition_score"].mean())
|
359 |
total_repetitions.append(df["total_repetitions"].mean())
|
360 |
|
361 |
-
|
362 |
|
363 |
model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
|
364 |
|
@@ -392,9 +381,9 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp", existing_metrics_df=N
|
|
392 |
metrics_df["ews_score"] = ews_score
|
393 |
metrics_df["repetition_score"] = repetition_score
|
394 |
metrics_df["total_repetitions"] = total_repetitions
|
395 |
-
metrics_df["
|
396 |
metrics_df["rap"] = metrics_df.apply(
|
397 |
-
lambda x: x["comet"]
|
398 |
)
|
399 |
|
400 |
metrics_df["translation_completeness"] = translation_completeness
|
|
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain_core.prompts import ChatPromptTemplate
|
11 |
from tqdm import tqdm
|
12 |
+
from eval_modules.calc_repetitions_v2e import *
|
13 |
from llm_toolkit.llm_utils import load_tokenizer, print_row_details
|
14 |
|
15 |
print(f"loading {__file__}")
|
|
|
228 |
|
229 |
|
230 |
def detect_repetition_scores(row, col, debug=False):
|
|
|
231 |
text = row[col] if isinstance(row[col], str) else ""
|
232 |
+
|
233 |
+
newline_score, repetition_score, total_repetitions = detect_scores(
|
234 |
+
row, debug=debug, answer_col=col, ground_truth_col="english"
|
235 |
)
|
|
|
|
|
|
|
236 |
|
237 |
return pd.Series(
|
238 |
[
|
|
|
291 |
ews_score = []
|
292 |
repetition_score = []
|
293 |
total_repetitions = []
|
294 |
+
rr = []
|
295 |
num_max_output_tokens = []
|
296 |
translation_completeness = []
|
297 |
columns = df.columns[2:]
|
298 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
new_col = f"count_chinese_characters-ground_truth"
|
300 |
df[new_col] = df["chinese"].apply(count_chinese_characters)
|
301 |
|
|
|
347 |
repetition_score.append(df["repetition_score"].mean())
|
348 |
total_repetitions.append(df["total_repetitions"].mean())
|
349 |
|
350 |
+
rr.append(df["total_repetitions"].mean() / df["answer_len"].mean())
|
351 |
|
352 |
model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
|
353 |
|
|
|
381 |
metrics_df["ews_score"] = ews_score
|
382 |
metrics_df["repetition_score"] = repetition_score
|
383 |
metrics_df["total_repetitions"] = total_repetitions
|
384 |
+
metrics_df["rr"] = rr
|
385 |
metrics_df["rap"] = metrics_df.apply(
|
386 |
+
lambda x: calc_adjusted_performance(x["comet"], x["rr"]), axis=1
|
387 |
)
|
388 |
|
389 |
metrics_df["translation_completeness"] = translation_completeness
|
notebooks/03a_RAPGeT_v2_Data Analysis_Chat_Template.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6273fc3413aa0c507438f5061c2047948cee5516d16244aacbd2f6f72b19dfff
|
3 |
+
size 1562071
|
notebooks/03b_RAPGeT_v2_Data Analysis_Generic_Prompt.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b948f85d9b7ba464ddadbd63be90d285641d64d366140a55a2999f69b21b2021
|
3 |
+
size 14255738
|
notebooks/03c_RAPGeT_v2_Data Analysis.ipynb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e2bc3dfca4cc2a8adf32d889114798a0eb59a2ebac079e5b2853a80ea4edbd9
|
3 |
+
size 11641357
|
requirements.txt
CHANGED
@@ -16,7 +16,8 @@ langchain==0.3.3
|
|
16 |
langchain-community==0.3.2
|
17 |
openai==1.51.2
|
18 |
wandb==0.17.6
|
19 |
-
transformers==4.45.1
|
|
|
20 |
bitsandbytes #==0.43.3
|
21 |
sentencepiece==0.1.98
|
22 |
einops==0.8.0
|
@@ -25,3 +26,4 @@ peft==0.11.1
|
|
25 |
sacrebleu==2.4.2
|
26 |
unbabel-comet==2.2.2
|
27 |
gradio==5.0.2
|
|
|
|
16 |
langchain-community==0.3.2
|
17 |
openai==1.51.2
|
18 |
wandb==0.17.6
|
19 |
+
# transformers==4.45.1
|
20 |
+
transformers==4.43.4
|
21 |
bitsandbytes #==0.43.3
|
22 |
sentencepiece==0.1.98
|
23 |
einops==0.8.0
|
|
|
26 |
sacrebleu==2.4.2
|
27 |
unbabel-comet==2.2.2
|
28 |
gradio==5.0.2
|
29 |
+
protobuf==4.25.4
|
results/mac-results_rpp_with_mnt_2048_generic_prompt_metrics.csv
CHANGED
@@ -1,26 +1,26 @@
|
|
1 |
-
model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,
|
2 |
-
internlm/internlm2_5-7b-chat,1.00,0.7357995069773978,0.4297612514398102,15.060226683930628,0.1506022668393063,0.4097577795330234,0.04942630185348632,9.235657546337158,9.285083848190645,0.
|
3 |
-
internlm/internlm2_5-7b-chat,1.02,0.7377187550620283,0.4246676977198055,14.728605282752795,0.147286052827528,0.4063246630867048,0.06972639011473963,5.35657546337158,5.
|
4 |
-
internlm/internlm2_5-7b-chat,1.04,0.7371160490183523,0.4173352728374962,13.846403511622256,0.1384640351162226,0.3988121301027288,0.06884377758164166,5.315092674315975,5.
|
5 |
-
internlm/internlm2_5-7b-chat,1.06,0.7338597697698218,0.3997609847704189,12.213374588416173,0.1221337458841617,0.3841365748920261,0.05825242718446602,5.275375110326567,5.
|
6 |
-
internlm/internlm2_5-7b-chat,1.08,0.7318234702626478,0.3881614120395272,11.369735763522288,0.1136973576352228,0.372963223209074,0.06707855251544571,5.283318623124448,5.
|
7 |
-
internlm/internlm2_5-7b-chat,1.10,0.7288648442604431,0.3784182249483568,10.377989030628608,0.103779890306286,0.3618424457502351,0.05207413945278023,5.288614298323036,5.340688437775817,0.
|
8 |
-
microsoft/Phi-3.5-mini-instruct,1.00,0.710605339281136,0.3788926591792472,9.70032874202361,0.097003287420236,0.3556134739443916,5.390997352162401,12.997352162400706,18.
|
9 |
-
microsoft/Phi-3.5-mini-instruct,1.02,0.7150978385770836,0.3741049510326346,9.910633597905436,0.0991063359790543,0.3453160556383774,3.586054721977052,7.001765225066196,10.
|
10 |
-
microsoft/Phi-3.5-mini-instruct,1.04,0.7074641684778791,0.3538698731015666,9.19721270538052,0.0919721270538052,0.3225824135517728,0.05119152691968226,0.05560458958517211,0.
|
11 |
-
microsoft/Phi-3.5-mini-instruct,1.06,0.6962301708225224,0.3252854575717334,6.967166383106307,0.069671663831063,0.2948764736589108,0.0353045013239188,0.06796116504854369,0.
|
12 |
-
microsoft/Phi-3.5-mini-instruct,1.08,0.6823413657174107,0.301599095293242,5.452744292893752,0.0545274429289375,0.2726387617958179,0.07678729037952339,0.04766107678729038,0.
|
13 |
-
microsoft/Phi-3.5-mini-instruct,1.10,0.6717851540206916,0.2885734336603344,4.751039447225815,0.0475103944722581,0.2604284999048123,0.08031774051191527,0.02383053839364519,0.10414827890556046,0.
|
14 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.739080294072365,0.4490104515425626,6.7013404492782405,0.0670134044927823,0.4196181637680596,0.36716681376875554,139.80935569285083,140.
|
15 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.743018615750854,0.4514907128972251,8.545954556237808,0.085459545562378,0.4214940415288087,1.0035304501323918,67.00353045013239,
|
16 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7432195577780335,0.4517500968367987,10.080425294411064,0.1008042529441106,0.4200973007348334,0.01059135039717564,35.19770520741395,35.
|
17 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.7430821573139815,0.4484154407825542,10.37470506193322,0.1037470506193321,0.4160289393328045,1.8005295675198587,26.880847308031775,28.
|
18 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7435937259684909,0.4407733547418294,10.930453247368872,0.1093045324736887,0.4113063412348818,0.09267431597528684,12.007943512797882,12.
|
19 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7427059700687901,0.4358940590119784,11.381344076286156,0.1138134407628615,0.4062980635945339,0.03971756398940865,0.6681376875551632,0.
|
20 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.
|
21 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7222260562908512,0.4039898602650971,13.461179673541356,0.1346117967354136,0.3819960428004565,0.05736981465136805,5.87378640776699,5.
|
22 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.723643534970515,0.4051102919608809,13.18537912294539,0.1318537912294539,0.3824621732976229,0.06266548984995587,5.840247131509267,5.
|
23 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7238812581796301,0.4039456988919502,13.314773371306682,0.1331477337130668,0.3813737464821349,0.05736981465136805,5.845542806707855,5.
|
24 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7252625281686607,0.4012797167602334,13.19924345265053,0.1319924345265053,0.3798291332004637,0.06266548984995587,5.847308031774051,5.
|
25 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.7261167238322592,0.3987395126194482,12.656486100206328,0.1265648610020633,0.376975448872996,0.05648720211827008,5.820829655781112,5.
|
26 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7264630642225547,0.3964859769229444,12.284961706379857,0.1228496170637985,0.3744555065346823,0.04942630185348632,0.09267431597528684,0.
|
|
|
1 |
+
model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rr,rap,translation_completeness,num_max_output_tokens
|
2 |
+
internlm/internlm2_5-7b-chat,1.00,0.7357995069773978,0.4297612514398102,15.060226683930628,0.1506022668393063,0.4097577795330234,0.04942630185348632,9.235657546337158,9.285083848190645,0.07525035765379114,0.6824623187873116,1.0,2
|
3 |
+
internlm/internlm2_5-7b-chat,1.02,0.7377187550620283,0.4246676977198055,14.728605282752795,0.147286052827528,0.4063246630867048,0.06972639011473963,5.35657546337158,5.426301853486319,0.04625547346404442,0.7043723959353786,1.0,1
|
4 |
+
internlm/internlm2_5-7b-chat,1.04,0.7371160490183523,0.4173352728374962,13.846403511622256,0.1384640351162226,0.3988121301027288,0.06884377758164166,5.315092674315975,5.383936451897617,0.04501878242643857,0.704667851183933,1.0,1
|
5 |
+
internlm/internlm2_5-7b-chat,1.06,0.7338597697698218,0.3997609847704189,12.213374588416173,0.1221337458841617,0.3841365748920261,0.05825242718446602,5.275375110326567,5.333627537511033,0.043830827367611756,0.7023888258277158,1.0,1
|
6 |
+
internlm/internlm2_5-7b-chat,1.08,0.7318234702626478,0.3881614120395272,11.369735763522288,0.1136973576352228,0.372963223209074,0.06707855251544571,5.283318623124448,5.350397175639894,0.04300663332269164,0.7010173875930041,1.0,1
|
7 |
+
internlm/internlm2_5-7b-chat,1.10,0.7288648442604431,0.3784182249483568,10.377989030628608,0.103779890306286,0.3618424457502351,0.05207413945278023,5.288614298323036,5.340688437775817,0.042176064682512025,0.6987634348896543,1.0,1
|
8 |
+
microsoft/Phi-3.5-mini-instruct,1.00,0.710605339281136,0.3788926591792472,9.70032874202361,0.097003287420236,0.3556134739443916,5.390997352162401,12.997352162400706,18.388349514563107,0.13770903562694164,0.6191875166952294,1.0,4
|
9 |
+
microsoft/Phi-3.5-mini-instruct,1.02,0.7150978385770836,0.3741049510326346,9.910633597905436,0.0991063359790543,0.3453160556383774,3.586054721977052,7.001765225066196,10.587819947043249,0.08180522500528503,0.6589279165887452,1.0,2
|
10 |
+
microsoft/Phi-3.5-mini-instruct,1.04,0.7074641684778791,0.3538698731015666,9.19721270538052,0.0919721270538052,0.3225824135517728,0.05119152691968226,0.05560458958517211,0.10679611650485436,0.000859149229250836,0.7068566122109297,1.0,0
|
11 |
+
microsoft/Phi-3.5-mini-instruct,1.06,0.6962301708225224,0.3252854575717334,6.967166383106307,0.069671663831063,0.2948764736589108,0.0353045013239188,0.06796116504854369,0.10326566637246248,0.0007865281839265906,0.6956827814674672,1.0,0
|
12 |
+
microsoft/Phi-3.5-mini-instruct,1.08,0.6823413657174107,0.301599095293242,5.452744292893752,0.0545274429289375,0.2726387617958179,0.07678729037952339,0.04766107678729038,0.12444836716681378,0.0009016671249608319,0.68172639822959,1.0,0
|
13 |
+
microsoft/Phi-3.5-mini-instruct,1.10,0.6717851540206916,0.2885734336603344,4.751039447225815,0.0475103944722581,0.2604284999048123,0.08031774051191527,0.02383053839364519,0.10414827890556046,0.0007188284314919954,0.6713024292710504,1.0,0
|
14 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.739080294072365,0.4490104515425626,6.7013404492782405,0.0670134044927823,0.4196181637680596,0.36716681376875554,139.80935569285083,140.1765225066196,0.48362195756964893,0.45567717525911156,0.999117387466902,15
|
15 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.743018615750854,0.4514907128972251,8.545954556237808,0.085459545562378,0.4214940415288087,1.0035304501323918,67.00353045013239,68.00706090026479,0.2929644725635723,0.5543280318044165,1.0,6
|
16 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7432195577780335,0.4517500968367987,10.080425294411064,0.1008042529441106,0.4200973007348334,0.01059135039717564,35.19770520741395,35.208296557811124,0.17564306911947306,0.6234997933386273,1.0,6
|
17 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.7430821573139815,0.4484154407825542,10.37470506193322,0.1037470506193321,0.4160289393328045,1.8005295675198587,26.880847308031775,28.68137687555163,0.1522966823356282,0.6381095208514015,1.0,3
|
18 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7435937259684909,0.4407733547418294,10.930453247368872,0.1093045324736887,0.4113063412348818,0.09267431597528684,12.007943512797882,12.100617828773169,0.06721477842655646,0.6952559422553878,1.0,3
|
19 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7427059700687901,0.4358940590119784,11.381344076286156,0.1138134407628615,0.4062980635945339,0.03971756398940865,0.6681376875551632,0.707855251544572,0.003961824217515018,0.7397693206556093,1.0,1
|
20 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.3888604919913587,0.2055875758168277,0.2434587181959752,0.0024345871819597,0.1844552188025856,638.2797881729921,3889.9232127096207,4528.203000882612,0.9210262088655917,0.15480936210106447,0.9240953221535746,570
|
21 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7222260562908512,0.4039898602650971,13.461179673541356,0.1346117967354136,0.3819960428004565,0.05736981465136805,5.87378640776699,5.931156222418358,0.05150372482295595,0.6859703892398439,1.0,1
|
22 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.723643534970515,0.4051102919608809,13.18537912294539,0.1318537912294539,0.3824621732976229,0.06266548984995587,5.840247131509267,5.902912621359223,0.05148734372113075,0.6873279697459551,1.0,1
|
23 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7238812581796301,0.4039456988919502,13.314773371306682,0.1331477337130668,0.3813737464821349,0.05736981465136805,5.845542806707855,5.902912621359223,0.05127418810757766,0.6877003345402561,1.0,1
|
24 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7252625281686607,0.4012797167602334,13.19924345265053,0.1319924345265053,0.3798291332004637,0.06266548984995587,5.847308031774051,5.909973521624007,0.05081388730791121,0.6893297921407147,1.0,1
|
25 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.7261167238322592,0.3987395126194482,12.656486100206328,0.1265648610020633,0.376975448872996,0.05648720211827008,5.820829655781112,5.877316857899382,0.05012721880128273,0.6906157284372703,1.0,1
|
26 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7264630642225547,0.3964859769229444,12.284961706379857,0.1228496170637985,0.3744555065346823,0.04942630185348632,0.09267431597528684,0.14210061782877317,0.001266948385624464,0.7255432558140477,1.0,0
|
results/mac-results_rpp_with_mnt_2048_metrics.csv
CHANGED
@@ -1,31 +1,31 @@
|
|
1 |
-
model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,
|
2 |
-
internlm/internlm2_5-7b-chat,1.00,0.739699612254078,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096466800937898,0.05383936451897617,12.606354810238305,12.
|
3 |
-
internlm/internlm2_5-7b-chat,1.02,0.740223803961056,0.4266246904302194,14.583816688798017,0.1458381668879802,0.4071727106228415,0.06266548984995587,9.849073256840247,9.
|
4 |
-
internlm/internlm2_5-7b-chat,1.04,0.7398856264610577,0.4154585167056314,13.534659133050225,0.1353465913305021,0.3968657713589718,0.07237422771403354,6.529567519858782,6.
|
5 |
-
internlm/internlm2_5-7b-chat,1.06,0.7379362287241489,0.4039588647855378,12.346740971499404,0.1234674097149939,0.3872447044295494,0.06796116504854369,6.533980582524272,6.
|
6 |
-
internlm/internlm2_5-7b-chat,1.08,0.7319988705684732,0.3873176839854818,11.075674965706344,0.1107567496570634,0.3724352909668609,0.05207413945278023,9.83495145631068,9.
|
7 |
-
internlm/internlm2_5-7b-chat,1.10,0.7295350462119345,0.3769306874386757,10.305163787094209,0.1030516378709421,0.3634496155759507,0.07855251544571933,6.527802294792586,6.
|
8 |
-
microsoft/Phi-3.5-mini-instruct,1.00,0.7107840433177544,0.3796831545348129,8.71296896471494,0.0871296896471493,0.3589874395901284,10.670785525154457,17.93821712268314,28.
|
9 |
-
microsoft/Phi-3.5-mini-instruct,1.02,0.7164765837070485,0.3780585837553919,10.291240080163629,0.1029124008016362,0.3546952732427276,3.585172109443954,7.1403353927625774,10.
|
10 |
-
microsoft/Phi-3.5-mini-instruct,1.04,0.7111233387336411,0.3547161333845742,8.966881655527896,0.0896688165552789,0.3300979657678754,3.6125330979699912,0.07325684024713151,3.685789938217123,0.
|
11 |
-
microsoft/Phi-3.5-mini-instruct,1.06,0.7024363270136286,0.3298733737040869,7.076233088011138,0.0707623308801113,0.3019513312669543,0.04589585172109444,0.05207413945278023,0.
|
12 |
-
microsoft/Phi-3.5-mini-instruct,1.08,0.6882111219210848,0.3054541022592767,5.105510599247868,0.0510551059924786,0.2736030007297014,3.3609885260370698,0.06443071491615181,3.
|
13 |
-
microsoft/Phi-3.5-mini-instruct,1.10,0.6712992989638161,0.2903831801547132,4.091958857999118,0.0409195885799911,0.251653275009876,0.32215357458075905,0.06531332744924978,0.
|
14 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.7501818982248062,0.4611110508507017,17.87914973742753,0.1787914973742752,0.4340662057009564,0.00706090026478376,0.1262135922330097,0.
|
15 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.7485114382045625,0.4571517219079576,17.436884594979905,0.174368845949799,0.4311385932640979,0.00706090026478376,0.11562224183583407,0.
|
16 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7500591586357918,0.4560467960364254,17.440173470996626,0.1744017347099662,0.4302844557731285,0.00706090026478376,0.13062665489849956,0.
|
17 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.748812871571673,0.4520416361219855,16.89523258317781,0.168952325831778,0.4260026774745837,0.00706090026478376,0.0997352162400706,0.
|
18 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7473851635144647,0.4442106511292453,16.16623784482793,0.1616623784482792,0.4195129470585874,0.01059135039717564,0.13062665489849956,0.
|
19 |
-
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7465709781131172,0.4379837926138161,15.60172257624066,0.1560172257624066,0.4132562932940978,0.01059135039717564,0.07855251544571933,0.
|
20 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.7426396049131678,0.433632501662176,15.209540658023398,0.1520954065802339,0.4089208235151474,0.00353045013239188,3.901147396293027,3.
|
21 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.7436477056353469,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4068967964789407,0.0,3.8905560458958517,3.
|
22 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.7440943776351209,0.4320478700956207,15.05135166158296,0.1505135166158296,0.4062008380201262,0.00353045013239188,0.1526919682259488,0.
|
23 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.7426502735395928,0.4275429314912545,14.449130821290163,0.1444913082129016,0.4001409979222783,0.00706090026478376,0.13768755516328332,0.
|
24 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.7408098006080129,0.4206626658729054,13.933703757385222,0.1393370375738522,0.3964824268676203,0.00353045013239188,0.1297440423654016,0.
|
25 |
-
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.7392685912871718,0.4111211240399151,13.303738403756984,0.1330373840375698,0.3870959581563503,0.00353045013239188,0.12180052956751986,0.
|
26 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7240239171358935,0.4068335357738006,13.565136550617618,0.1356513655061761,0.3866395067055498,0.0529567519858782,0.1209179170344219,0.
|
27 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.7263097057327799,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863697821025159,0.06001765225066196,6.236540158870256,6.
|
28 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7276128307708258,0.4054859896994975,13.295092218891954,0.1329509221889195,0.3851203729935697,0.05207413945278023,0.1297440423654016,0.
|
29 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7276865132383193,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3804952786306688,0.05207413945278023,0.13415710503089143,0.
|
30 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.726393195584298,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3788945955746495,0.05648720211827008,0.15357458075904679,0.21006178287731686,0.
|
31 |
-
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7244012304511832,0.3932239948456176,12.361161644811926,0.1236116164481192,0.3733413807007665,0.05030891438658429,0.08561341571050309,0.13592233009708737,0.
|
|
|
1 |
+
model,rpp,comet,meteor,spbleu,bleu_1,rouge_l,ews_score,repetition_score,total_repetitions,rr,rap,translation_completeness,num_max_output_tokens
|
2 |
+
internlm/internlm2_5-7b-chat,1.00,0.739699612254078,0.4289996929258777,14.734881589173108,0.1473488158917311,0.4096466800937898,0.05383936451897617,12.606354810238305,12.660194174757281,0.10371655820679682,0.6668249823411998,1.0,2
|
3 |
+
internlm/internlm2_5-7b-chat,1.02,0.740223803961056,0.4266246904302194,14.583816688798017,0.1458381668879802,0.4071727106228415,0.06266548984995587,9.849073256840247,9.911738746690203,0.0832234063051179,0.6811136450938015,1.0,1
|
4 |
+
internlm/internlm2_5-7b-chat,1.04,0.7398856264610577,0.4154585167056314,13.534659133050225,0.1353465913305021,0.3968657713589718,0.07237422771403354,6.529567519858782,6.601941747572815,0.05613508442776736,0.6994963184593858,1.0,1
|
5 |
+
internlm/internlm2_5-7b-chat,1.06,0.7379362287241489,0.4039588647855378,12.346740971499404,0.1234674097149939,0.3872447044295494,0.06796116504854369,6.533980582524272,6.601941747572815,0.05513987689359035,0.6983479910444814,0.999117387466902,1
|
6 |
+
internlm/internlm2_5-7b-chat,1.08,0.7319988705684732,0.3873176839854818,11.075674965706344,0.1107567496570634,0.3724352909668609,0.05207413945278023,9.83495145631068,9.88702559576346,0.07906717392378437,0.676350746394496,0.999117387466902,1
|
7 |
+
internlm/internlm2_5-7b-chat,1.10,0.7295350462119345,0.3769306874386757,10.305163787094209,0.1030516378709421,0.3634496155759507,0.07855251544571933,6.527802294792586,6.606354810238305,0.053004659594657756,0.6918732323764201,0.999117387466902,1
|
8 |
+
microsoft/Phi-3.5-mini-instruct,1.00,0.7107840433177544,0.3796831545348129,8.71296896471494,0.0871296896471493,0.3589874395901284,10.670785525154457,17.93821712268314,28.6090026478376,0.20225504327262062,0.5806299320134263,1.0,6
|
9 |
+
microsoft/Phi-3.5-mini-instruct,1.02,0.7164765837070485,0.3780585837553919,10.291240080163629,0.1029124008016362,0.3546952732427276,3.585172109443954,7.1403353927625774,10.725507502206531,0.08530053839296368,0.6578947912630083,1.0,2
|
10 |
+
microsoft/Phi-3.5-mini-instruct,1.04,0.7111233387336411,0.3547161333845742,8.966881655527896,0.0896688165552789,0.3300979657678754,3.6125330979699912,0.07325684024713151,3.685789938217123,0.02973427131098516,0.6902898733301204,1.0,1
|
11 |
+
microsoft/Phi-3.5-mini-instruct,1.06,0.7024363270136286,0.3298733737040869,7.076233088011138,0.0707623308801113,0.3019513312669543,0.04589585172109444,0.05207413945278023,0.09796999117387467,0.0007571675113745661,0.701904666351495,1.0,0
|
12 |
+
microsoft/Phi-3.5-mini-instruct,1.08,0.6882111219210848,0.3054541022592767,5.105510599247868,0.0510551059924786,0.2736030007297014,3.3609885260370698,0.06443071491615181,3.4254192409532216,0.023581380370521147,0.6721720091371258,1.0,1
|
13 |
+
microsoft/Phi-3.5-mini-instruct,1.10,0.6712992989638161,0.2903831801547132,4.091958857999118,0.0409195885799911,0.251653275009876,0.32215357458075905,0.06531332744924978,0.38746690203000883,0.0023407216247487324,0.6697298117609694,1.0,0
|
14 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.00,0.7501818982248062,0.4611110508507017,17.87914973742753,0.1787914973742752,0.4340662057009564,0.00706090026478376,0.1262135922330097,0.13327449249779347,0.0011265209898463904,0.7493372784005554,1.0,0
|
15 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.02,0.7485114382045625,0.4571517219079576,17.436884594979905,0.174368845949799,0.4311385932640979,0.00706090026478376,0.11562224183583407,0.12268314210061783,0.0010382199383043404,0.7477347219762754,1.0,0
|
16 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.04,0.7500591586357918,0.4560467960364254,17.440173470996626,0.1744017347099662,0.4302844557731285,0.00706090026478376,0.13062665489849956,0.13768755516328332,0.0011593944393659004,0.7491900481363686,1.0,0
|
17 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.06,0.748812871571673,0.4520416361219855,16.89523258317781,0.168952325831778,0.4260026774745837,0.00706090026478376,0.0997352162400706,0.10679611650485436,0.0008902491962006224,0.7481465381600518,1.0,0
|
18 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.08,0.7473851635144647,0.4442106511292453,16.16623784482793,0.1616623784482792,0.4195129470585874,0.01059135039717564,0.13062665489849956,0.1412180052956752,0.001176591707969938,0.7465063134536485,1.0,0
|
19 |
+
shenzhi-wang/Llama3.1-70B-Chinese-Chat,1.10,0.7465709781131172,0.4379837926138161,15.60172257624066,0.1560172257624066,0.4132562932940978,0.01059135039717564,0.07855251544571933,0.08914386584289496,0.000734476013176936,0.7460228409589962,1.0,0
|
20 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.00,0.7426396049131678,0.433632501662176,15.209540658023398,0.1520954065802339,0.4089208235151474,0.00353045013239188,3.901147396293027,3.904677846425419,0.03237065275450547,0.7189848023792343,1.0,1
|
21 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.02,0.7436477056353469,0.4329054166518245,15.19102241646024,0.1519102241646024,0.4068967964789407,0.0,3.8905560458958517,3.8905560458958517,0.03219656852361788,0.7200861374743239,1.0,1
|
22 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.04,0.7440943776351209,0.4320478700956207,15.05135166158296,0.1505135166158296,0.4062008380201262,0.00353045013239188,0.1526919682259488,0.1562224183583407,0.001352332200022921,0.7430887949425674,1.0,0
|
23 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.06,0.7426502735395928,0.4275429314912545,14.449130821290163,0.1444913082129016,0.4001409979222783,0.00706090026478376,0.13768755516328332,0.14474845542806708,0.0012399256044637321,0.7417300130954148,1.0,0
|
24 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.08,0.7408098006080129,0.4206626658729054,13.933703757385222,0.1393370375738522,0.3964824268676203,0.00353045013239188,0.1297440423654016,0.13327449249779347,0.001134996993385448,0.7399694606935023,1.0,0
|
25 |
+
shenzhi-wang/Llama3.1-8B-Chinese-Chat,1.10,0.7392685912871718,0.4111211240399151,13.303738403756984,0.1330373840375698,0.3870959581563503,0.00353045013239188,0.12180052956751986,0.12533097969991175,0.0010529672171262895,0.7384905753804022,1.0,0
|
26 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.00,0.7240239171358935,0.4068335357738006,13.565136550617618,0.1356513655061761,0.3866395067055498,0.0529567519858782,0.1209179170344219,0.17387466902030008,0.001578993772192076,0.7228815899808891,1.0,0
|
27 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.02,0.7263097057327799,0.4064914781094827,13.42987641622816,0.1342987641622816,0.3863697821025159,0.06001765225066196,6.236540158870256,6.296557811120918,0.0541899611084103,0.6879984291168549,1.0,1
|
28 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.04,0.7276128307708258,0.4054859896994975,13.295092218891954,0.1329509221889195,0.3851203729935697,0.05207413945278023,0.1297440423654016,0.18181818181818182,0.0016533037985858635,0.7264108595993547,1.0,0
|
29 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.06,0.7276865132383193,0.4014727027723293,13.10860799057166,0.1310860799057166,0.3804952786306688,0.05207413945278023,0.13415710503089143,0.18623124448367168,0.001691057431836761,0.7264569934393594,1.0,0
|
30 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.08,0.726393195584298,0.3987018836449559,12.850537785783194,0.1285053778578319,0.3788945955746495,0.05648720211827008,0.15357458075904679,0.21006178287731686,0.0018871365478087807,0.7250236850699382,1.0,0
|
31 |
+
shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat,1.10,0.7244012304511832,0.3932239948456176,12.361161644811926,0.1236116164481192,0.3733413807007665,0.05030891438658429,0.08561341571050309,0.13592233009708737,0.0012217374057913526,0.7235167427869905,1.0,0
|