dh-mc commited on
Commit
a69b127
·
1 Parent(s): cf5ba3f

enable do_sample

Browse files
.env.example CHANGED
@@ -1,4 +1,5 @@
1
  MODEL_NAME=internlm/internlm2_5-7b-chat-1m
 
2
 
3
  HF_TOKEN=
4
 
 
1
  MODEL_NAME=internlm/internlm2_5-7b-chat-1m
2
+ BATCH_SIZE=2
3
 
4
  HF_TOKEN=
5
 
.gitattributes CHANGED
@@ -51,3 +51,4 @@ llama-factory/data/dataset_info.json filter=lfs diff=lfs merge=lfs -text
51
  datasets/mac/mac-test.tsv filter=lfs diff=lfs merge=lfs -text
52
  datasets/mac/mac-train.tsv filter=lfs diff=lfs merge=lfs -text
53
  datasets/mac/mac.tsv filter=lfs diff=lfs merge=lfs -text
 
 
51
  datasets/mac/mac-test.tsv filter=lfs diff=lfs merge=lfs -text
52
  datasets/mac/mac-train.tsv filter=lfs diff=lfs merge=lfs -text
53
  datasets/mac/mac.tsv filter=lfs diff=lfs merge=lfs -text
54
+ results/mac-results_greedy_decoding.csv filter=lfs diff=lfs merge=lfs -text
llm_toolkit/eval.py CHANGED
@@ -25,8 +25,11 @@ adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
25
  load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
26
  data_path = os.getenv("DATA_PATH")
27
  results_path = os.getenv("RESULTS_PATH")
 
28
 
29
- print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
 
 
30
 
31
  if is_cuda:
32
  torch.cuda.empty_cache()
@@ -84,6 +87,7 @@ evaluate_model_with_repetition_penalty(
84
  start_repetition_penalty=1.0,
85
  end_repetition_penalty=1.3,
86
  step_repetition_penalty=0.02,
 
87
  device=device,
88
  )
89
 
 
25
  load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
26
  data_path = os.getenv("DATA_PATH")
27
  results_path = os.getenv("RESULTS_PATH")
28
+ batch_size = int(os.getenv("BATCH_SIZE", 1))
29
 
30
+ print(
31
+ model_name, adapter_name_or_path, load_in_4bit, data_path, results_path, batch_size
32
+ )
33
 
34
  if is_cuda:
35
  torch.cuda.empty_cache()
 
87
  start_repetition_penalty=1.0,
88
  end_repetition_penalty=1.3,
89
  step_repetition_penalty=0.02,
90
+ batch_size=batch_size,
91
  device=device,
92
  )
93
 
llm_toolkit/llm_utils.py CHANGED
@@ -22,6 +22,12 @@ def get_template(model_name):
22
  return "chatml"
23
 
24
 
 
 
 
 
 
 
25
  def load_model(
26
  model_name,
27
  dtype=torch.bfloat16,
@@ -50,7 +56,7 @@ def load_model(
50
  )
51
  return chat_model.engine.model, chat_model.engine.tokenizer
52
 
53
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
54
  bnb_config = BitsAndBytesConfig(
55
  load_in_4bit=load_in_4bit,
56
  bnb_4bit_quant_type="nf4",
@@ -164,6 +170,10 @@ def eval_model(
164
  device="cuda",
165
  max_new_tokens=2048,
166
  repetition_penalty=1.0,
 
 
 
 
167
  batch_size=1,
168
  ):
169
  total = len(eval_dataset)
@@ -184,6 +194,10 @@ def eval_model(
184
  outputs = model.generate(
185
  **inputs,
186
  max_new_tokens=max_new_tokens,
 
 
 
 
187
  repetition_penalty=repetition_penalty,
188
  use_cache=False,
189
  )
@@ -207,6 +221,7 @@ def evaluate_model_with_repetition_penalty(
207
  start_repetition_penalty=1.0,
208
  end_repetition_penalty=1.3,
209
  step_repetition_penalty=0.02,
 
210
  device="cuda",
211
  ):
212
  print(f"Evaluating model: {model_name} on {device}")
@@ -225,6 +240,7 @@ def evaluate_model_with_repetition_penalty(
225
  dataset,
226
  device=device,
227
  repetition_penalty=repetition_penalty,
 
228
  )
229
 
230
  model_name_with_rp = f"{model_name}/rpp-{repetition_penalty:.2f}"
 
22
  return "chatml"
23
 
24
 
25
+ def load_tokenizer(model_name):
26
+ return AutoTokenizer.from_pretrained(
27
+ model_name, trust_remote_code=True, padding_side="left"
28
+ )
29
+
30
+
31
  def load_model(
32
  model_name,
33
  dtype=torch.bfloat16,
 
56
  )
57
  return chat_model.engine.model, chat_model.engine.tokenizer
58
 
59
+ tokenizer = load_tokenizer(model_name)
60
  bnb_config = BitsAndBytesConfig(
61
  load_in_4bit=load_in_4bit,
62
  bnb_4bit_quant_type="nf4",
 
170
  device="cuda",
171
  max_new_tokens=2048,
172
  repetition_penalty=1.0,
173
+ do_sample=True,
174
+ top_p=0.95,
175
+ top_k=0, # select from top 0 tokens (because zero, relies on top_p)
176
+ temperature=0.01,
177
  batch_size=1,
178
  ):
179
  total = len(eval_dataset)
 
194
  outputs = model.generate(
195
  **inputs,
196
  max_new_tokens=max_new_tokens,
197
+ do_sample=do_sample,
198
+ temperature=temperature,
199
+ top_p=top_p,
200
+ top_k=top_k,
201
  repetition_penalty=repetition_penalty,
202
  use_cache=False,
203
  )
 
221
  start_repetition_penalty=1.0,
222
  end_repetition_penalty=1.3,
223
  step_repetition_penalty=0.02,
224
+ batch_size=1,
225
  device="cuda",
226
  ):
227
  print(f"Evaluating model: {model_name} on {device}")
 
240
  dataset,
241
  device=device,
242
  repetition_penalty=repetition_penalty,
243
+ batch_size=batch_size,
244
  )
245
 
246
  model_name_with_rp = f"{model_name}/rpp-{repetition_penalty:.2f}"
llm_toolkit/translation_utils.py CHANGED
@@ -9,6 +9,7 @@ from langchain_openai import ChatOpenAI
9
  from langchain_core.prompts import ChatPromptTemplate
10
  from tqdm import tqdm
11
  from eval_modules.calc_repetitions import *
 
12
 
13
  print(f"loading {__file__}")
14
 
@@ -162,20 +163,25 @@ def load_translation_dataset(data_path, tokenizer=None):
162
  return datasets
163
 
164
 
165
- def get_metrics(df):
166
  metrics_df = pd.DataFrame(df.columns.T)[2:]
167
  metrics_df.rename(columns={0: "model"}, inplace=True)
168
  metrics_df["rpp"] = metrics_df["model"].apply(lambda x: x.split("rpp-")[-1])
169
- metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/")[1])
170
  metrics_df.reset_index(inplace=True)
171
  metrics_df = metrics_df.drop(columns=["index"])
172
 
 
 
 
 
173
  meteor = []
174
  bleu_1 = []
175
  rouge_l = []
176
  ews_score = []
177
  repetition_score = []
178
  total_repetitions = []
 
179
 
180
  for col in df.columns[2:]:
181
  metrics = calc_metrics(df["english"], df[col], debug=True)
@@ -192,12 +198,23 @@ def get_metrics(df):
192
  repetition_score.append(df["repetition_score"].mean())
193
  total_repetitions.append(df["total_repetitions"].mean())
194
 
 
 
 
 
 
 
 
 
195
  metrics_df["meteor"] = meteor
196
  metrics_df["bleu_1"] = bleu_1
197
  metrics_df["rouge_l"] = rouge_l
198
  metrics_df["ews_score"] = ews_score
199
  metrics_df["repetition_score"] = ews_score
200
  metrics_df["total_repetitions"] = ews_score
 
 
 
201
 
202
  return metrics_df
203
 
 
9
  from langchain_core.prompts import ChatPromptTemplate
10
  from tqdm import tqdm
11
  from eval_modules.calc_repetitions import *
12
+ from llm_toolkit.llm_utils import load_tokenizer
13
 
14
  print(f"loading {__file__}")
15
 
 
163
  return datasets
164
 
165
 
166
+ def get_metrics(df, max_output_tokens=2048):
167
  metrics_df = pd.DataFrame(df.columns.T)[2:]
168
  metrics_df.rename(columns={0: "model"}, inplace=True)
169
  metrics_df["rpp"] = metrics_df["model"].apply(lambda x: x.split("rpp-")[-1])
170
+ metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/rpp-")[0])
171
  metrics_df.reset_index(inplace=True)
172
  metrics_df = metrics_df.drop(columns=["index"])
173
 
174
+ tokenizers = {
175
+ model: load_tokenizer(model) for model in metrics_df["model"].unique()
176
+ }
177
+
178
  meteor = []
179
  bleu_1 = []
180
  rouge_l = []
181
  ews_score = []
182
  repetition_score = []
183
  total_repetitions = []
184
+ num_entries_with_max_output_tokens = []
185
 
186
  for col in df.columns[2:]:
187
  metrics = calc_metrics(df["english"], df[col], debug=True)
 
198
  repetition_score.append(df["repetition_score"].mean())
199
  total_repetitions.append(df["total_repetitions"].mean())
200
 
201
+ df["output_tokens"] = df[col].apply(
202
+ lambda x: len(tokenizers[col.split("/rpp")[0]](x)["input_ids"])
203
+ )
204
+
205
+ num_entries_with_max_output_tokens.append(
206
+ df["output_tokens"].value_counts().get(max_output_tokens, 0)
207
+ )
208
+
209
  metrics_df["meteor"] = meteor
210
  metrics_df["bleu_1"] = bleu_1
211
  metrics_df["rouge_l"] = rouge_l
212
  metrics_df["ews_score"] = ews_score
213
  metrics_df["repetition_score"] = ews_score
214
  metrics_df["total_repetitions"] = ews_score
215
+ metrics_df["num_entries_with_max_output_tokens"] = (
216
+ num_entries_with_max_output_tokens
217
+ )
218
 
219
  return metrics_df
220
 
notebooks/00_Data Analysis.ipynb CHANGED
@@ -1 +1 @@
1
- {"cells":[{"cell_type":"code","execution_count":72,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[{"name":"stdout","output_type":"stream","text":["The autoreload extension is already loaded. To reload it, use:\n"," %reload_ext autoreload\n"]}],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":73,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/papers/rapget-translation\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","# check if workding_dir is in local variables\n","if 'workding_dir' not in locals():\n"," workding_dir = str(Path.cwd().parent)\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":74,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/papers/rapget-translation/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":74,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":75,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["Qwen/Qwen2-7B-Instruct None False datasets/mac/mac.tsv results/mac-results.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"DATA_PATH\")\n","results_path = os.getenv(\"RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":76,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","Name: torch\n","Version: 2.4.0\n","Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n","Home-page: https://pytorch.org/\n","Author: PyTorch Team\n","Author-email: packages@pytorch.org\n","License: BSD-3\n","Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages\n","Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions\n","Required-by: accelerate, peft, torchaudio, torchvision\n","---\n","Name: transformers\n","Version: 4.43.3\n","Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow\n","Home-page: https://github.com/huggingface/transformers\n","Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)\n","Author-email: transformers@huggingface.co\n","License: Apache 2.0 License\n","Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages\n","Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm\n","Required-by: peft\n","CPU times: user 7.77 ms, sys: 12.1 ms, total: 19.9 ms\n","Wall time: 1.86 s\n"]}],"source":["%%time\n","os.environ[\"TOKENIZERS_PARALLELISM\"] = \"true\"\n","\n","!python --version\n","!pip show torch transformers"]},{"cell_type":"code","execution_count":77,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.translation_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":78,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1133 entries, 0 to 1132\n","Data columns (total 20 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 chinese 1133 non-null object\n"," 1 english 1133 non-null object\n"," 2 Qwen/Qwen2-7B-Instruct/rpp-1.00 1133 non-null object\n"," 3 Qwen/Qwen2-7B-Instruct/rpp-1.02 1133 non-null object\n"," 4 Qwen/Qwen2-7B-Instruct/rpp-1.04 1133 non-null object\n"," 5 Qwen/Qwen2-7B-Instruct/rpp-1.06 1133 non-null object\n"," 6 Qwen/Qwen2-7B-Instruct/rpp-1.08 1133 non-null object\n"," 7 Qwen/Qwen2-7B-Instruct/rpp-1.10 1133 non-null object\n"," 8 Qwen/Qwen2-7B-Instruct/rpp-1.12 1133 non-null object\n"," 9 Qwen/Qwen2-7B-Instruct/rpp-1.14 1133 non-null object\n"," 10 Qwen/Qwen2-7B-Instruct/rpp-1.16 1133 non-null object\n"," 11 Qwen/Qwen2-7B-Instruct/rpp-1.18 1133 non-null object\n"," 12 Qwen/Qwen2-7B-Instruct/rpp-1.20 1133 non-null object\n"," 13 Qwen/Qwen2-7B-Instruct/rpp-1.22 1133 non-null object\n"," 14 Qwen/Qwen2-7B-Instruct/rpp-1.24 1133 non-null object\n"," 15 Qwen/Qwen2-7B-Instruct/rpp-1.26 1133 non-null object\n"," 16 Qwen/Qwen2-7B-Instruct/rpp-1.28 1133 non-null object\n"," 17 Qwen/Qwen2-7B-Instruct/rpp-1.30 1133 non-null object\n"," 18 internlm/internlm2_5-7b-chat-1m/rpp-1.00 1133 non-null object\n"," 19 internlm/internlm2_5-7b-chat-1m/rpp-1.02 1133 non-null object\n","dtypes: object(20)\n","memory usage: 177.2+ KB\n"]}],"source":["import pandas as pd\n","\n","df = pd.read_csv(results_path)\n","df.info()"]},{"cell_type":"code","execution_count":79,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Qwen/Qwen2-7B-Instruct/rpp-1.00: {'meteor': 0.37372392521659187, 'bleu_scores': {'bleu': 0.11236357019695803, 'precisions': [0.42194734753274243, 0.15166821289901386, 0.07014658562745799, 0.035509352410760864], 'brevity_penalty': 1.0, 'length_ratio': 1.0015236833388539, 'translation_length': 30236, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42297796931661225, 'rouge2': 0.17403983204578213, 'rougeL': 0.3658856686382874, 'rougeLsum': 0.3659204687398736}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.02: {'meteor': 0.37190869873532223, 'bleu_scores': {'bleu': 0.1100390286367209, 'precisions': [0.4184028352820377, 0.1497005988023952, 0.06846911369740376, 0.03418803418803419], 'brevity_penalty': 1.0, 'length_ratio': 1.0000331235508446, 'translation_length': 30191, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4195845948970141, 'rouge2': 0.17186391930180184, 'rougeL': 0.361164889670589, 'rougeLsum': 0.36115367651131036}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.04: {'meteor': 0.37406550703189817, 'bleu_scores': {'bleu': 0.10956931974949688, 'precisions': [0.4130972529618539, 0.14825453685242135, 0.06855717197273174, 0.03432753888380604], 'brevity_penalty': 1.0, 'length_ratio': 1.0176879761510433, 'translation_length': 30724, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41943813931860635, 'rouge2': 0.1731766218403924, 'rougeL': 0.36189598520170224, 'rougeLsum': 0.3622987164716138}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.06: {'meteor': 0.3743075569159023, 'bleu_scores': {'bleu': 0.10936803300009316, 'precisions': [0.4205087175384205, 0.15056818181818182, 0.06837113699469907, 0.03391232423490488], 'brevity_penalty': 0.99358667697838, 'length_ratio': 0.9936071546869825, 'translation_length': 29997, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4188563626995033, 'rouge2': 0.17237953917851812, 'rougeL': 0.3614238400989537, 'rougeLsum': 0.36157826104604907}, 'accuracy': 0.00088261253309797, 'correct_ids': [364]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.08: {'meteor': 0.37389232127803795, 'bleu_scores': {'bleu': 0.11252598445639024, 'precisions': [0.41829608938547486, 0.14934862987663705, 0.0709559087966626, 0.03674127394743748], 'brevity_penalty': 0.9960837725634155, 'length_ratio': 0.9960914210003312, 'translation_length': 30072, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42031703148419475, 'rouge2': 0.17243974225774378, 'rougeL': 0.36154500391739963, 'rougeLsum': 0.36189135440678893}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 533, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.10: {'meteor': 0.3702585216722875, 'bleu_scores': {'bleu': 0.10710301916085879, 'precisions': [0.41596456758466815, 0.1461794019933555, 0.06721175665454021, 0.032895230942546004], 'brevity_penalty': 0.9946528632038626, 'length_ratio': 0.9946671083140113, 'translation_length': 30029, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41782139222094783, 'rouge2': 0.17158596223024214, 'rougeL': 0.3589904425478737, 'rougeLsum': 0.359006959234046}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 533]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.12: {'meteor': 0.3705755989089089, 'bleu_scores': {'bleu': 0.10258222363948062, 'precisions': [0.40718875371016666, 0.14041861410282463, 0.06307892790476526, 0.03070320237702212], 'brevity_penalty': 1.0, 'length_ratio': 1.015534945346141, 'translation_length': 30659, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4166572027441734, 'rouge2': 0.16822111899564623, 'rougeL': 0.357008902161872, 'rougeLsum': 0.35745653956369094}, 'accuracy': 0.00441306266548985, 'correct_ids': [240, 364, 533, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.14: {'meteor': 0.3641110975372605, 'bleu_scores': {'bleu': 0.0990318198813296, 'precisions': [0.4006066338345129, 0.13617583310755893, 0.060538827258320126, 0.02912372079374977], 'brevity_penalty': 1.0, 'length_ratio': 1.0156011924478303, 'translation_length': 30661, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41237689483692086, 'rouge2': 0.1647530979631625, 'rougeL': 0.3524579320803469, 'rougeLsum': 0.3525094464318763}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.16: {'meteor': 0.36313903134249, 'bleu_scores': {'bleu': 0.0952870490417418, 'precisions': [0.3922494060232454, 0.13080998234098556, 0.05827562326869806, 0.027570548167369445], 'brevity_penalty': 1.0, 'length_ratio': 1.031666114607486, 'translation_length': 31146, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40833803439030314, 'rouge2': 0.16062251994620244, 'rougeL': 0.3497483615160329, 'rougeLsum': 0.34978679608530117}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.18: {'meteor': 0.3621942846022667, 'bleu_scores': {'bleu': 0.09476128635888595, 'precisions': [0.39506776127182697, 0.13002739911375708, 0.05743932465705241, 0.027328009377976407], 'brevity_penalty': 1.0, 'length_ratio': 1.0167605167273932, 'translation_length': 30696, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4085466936844559, 'rouge2': 0.16039337465336806, 'rougeL': 0.34699495728564167, 'rougeLsum': 0.3476358801921955}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 533]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.20: {'meteor': 0.3538173238431576, 'bleu_scores': {'bleu': 0.08968963366420402, 'precisions': [0.3857477091050887, 0.12459093822745522, 0.053809456994527854, 0.025021917007597896], 'brevity_penalty': 1.0, 'length_ratio': 1.019344153693276, 'translation_length': 30774, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4011401317225115, 'rouge2': 0.1560858750463962, 'rougeL': 0.34218460807223944, 'rougeLsum': 0.3425934037679277}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.22: {'meteor': 0.35181344675948434, 'bleu_scores': {'bleu': 0.08468174500037418, 'precisions': [0.3809662304087898, 0.11948208774989937, 0.04979253112033195, 0.022688496024975496], 'brevity_penalty': 1.0, 'length_ratio': 1.0250082808877112, 'translation_length': 30945, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39827961553333474, 'rouge2': 0.15193285343874843, 'rougeL': 0.3372750185470212, 'rougeLsum': 0.33759743837835204}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 658]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.24: {'meteor': 0.3478194405208726, 'bleu_scores': {'bleu': 0.0805927787556035, 'precisions': [0.37261687333781923, 0.11477590105067163, 0.04729986525239263, 0.020855057351407715], 'brevity_penalty': 1.0, 'length_ratio': 1.033752898310699, 'translation_length': 31209, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39685525200407556, 'rouge2': 0.15120218239789796, 'rougeL': 0.3358927244287901, 'rougeLsum': 0.3360244508591187}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.26: {'meteor': 0.34377313642615803, 'bleu_scores': {'bleu': 0.07378753043886055, 'precisions': [0.3538180711895345, 0.10627973658505845, 0.04270683570775062, 0.018458823928607784], 'brevity_penalty': 1.0, 'length_ratio': 1.0887711162636635, 'translation_length': 32870, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3900349430194469, 'rouge2': 0.14626610466291398, 'rougeL': 0.3286622597539425, 'rougeLsum': 0.32885867843106675}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.28: {'meteor': 0.34017553840969, 'bleu_scores': {'bleu': 0.07269823806123552, 'precisions': [0.350266309534805, 0.10201607758070691, 0.041866622538474264, 0.01867070109686071], 'brevity_penalty': 1.0, 'length_ratio': 1.0758860549850944, 'translation_length': 32481, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3878054004863958, 'rouge2': 0.14229440977177754, 'rougeL': 0.32657427170563236, 'rougeLsum': 0.3267111227947044}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.30: {'meteor': 0.3353695673104313, 'bleu_scores': {'bleu': 0.05830111152417765, 'precisions': [0.2870419881343417, 0.08188253801782905, 0.03320993325587051, 0.014801393728222997], 'brevity_penalty': 1.0, 'length_ratio': 1.300861212321961, 'translation_length': 39273, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.380210905537834, 'rouge2': 0.14103422879273908, 'rougeL': 0.3209978795799936, 'rougeLsum': 0.321330521060505}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","internlm/internlm2_5-7b-chat-1m/rpp-1.00: {'meteor': 0.37152961221312103, 'bleu_scores': {'bleu': 0.09920863765540926, 'precisions': [0.3728006623887394, 0.13422469709949822, 0.06224842318785458, 0.031100006575054243], 'brevity_penalty': 1.0, 'length_ratio': 1.1201391189135474, 'translation_length': 33817, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4203311940833312, 'rouge2': 0.17021837671248571, 'rougeL': 0.3632707656424509, 'rougeLsum': 0.3630393657869949}, 'accuracy': 0.0, 'correct_ids': []}\n","internlm/internlm2_5-7b-chat-1m/rpp-1.02: {'meteor': 0.352901317633597, 'bleu_scores': {'bleu': 0.08697903417673139, 'precisions': [0.3666595931730682, 0.11979657185910718, 0.05260074213918365, 0.024771882392700235], 'brevity_penalty': 1.0, 'length_ratio': 1.0926465717124876, 'translation_length': 32987, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3998861989122014, 'rouge2': 0.15164946531097323, 'rougeL': 0.34028230510270174, 'rougeLsum': 0.3410649041040759}, 'accuracy': 0.00088261253309797, 'correct_ids': [511]}\n"]},{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>model</th>\n"," <th>rpp</th>\n"," <th>meteor</th>\n"," <th>bleu_1</th>\n"," <th>rouge_l</th>\n"," <th>ews_score</th>\n"," <th>repetition_score</th>\n"," <th>total_repetitions</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.00</td>\n"," <td>0.373724</td>\n"," <td>0.112364</td>\n"," <td>0.365886</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.02</td>\n"," <td>0.371909</td>\n"," <td>0.110039</td>\n"," <td>0.361165</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.04</td>\n"," <td>0.374066</td>\n"," <td>0.109569</td>\n"," <td>0.361896</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.06</td>\n"," <td>0.374308</td>\n"," <td>0.109368</td>\n"," <td>0.361424</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.08</td>\n"," <td>0.373892</td>\n"," <td>0.112526</td>\n"," <td>0.361545</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.10</td>\n"," <td>0.370259</td>\n"," <td>0.107103</td>\n"," <td>0.358990</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.12</td>\n"," <td>0.370576</td>\n"," <td>0.102582</td>\n"," <td>0.357009</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.14</td>\n"," <td>0.364111</td>\n"," <td>0.099032</td>\n"," <td>0.352458</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.16</td>\n"," <td>0.363139</td>\n"," <td>0.095287</td>\n"," <td>0.349748</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.18</td>\n"," <td>0.362194</td>\n"," <td>0.094761</td>\n"," <td>0.346995</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.20</td>\n"," <td>0.353817</td>\n"," <td>0.089690</td>\n"," <td>0.342185</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.22</td>\n"," <td>0.351813</td>\n"," <td>0.084682</td>\n"," <td>0.337275</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>12</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.24</td>\n"," <td>0.347819</td>\n"," <td>0.080593</td>\n"," <td>0.335893</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>13</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.26</td>\n"," <td>0.343773</td>\n"," <td>0.073788</td>\n"," <td>0.328662</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>14</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.28</td>\n"," <td>0.340176</td>\n"," <td>0.072698</td>\n"," <td>0.326574</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>15</th>\n"," <td>Qwen2-7B-Instruct</td>\n"," <td>1.30</td>\n"," <td>0.335370</td>\n"," <td>0.058301</td>\n"," <td>0.320998</td>\n"," <td>0.004413</td>\n"," <td>0.004413</td>\n"," <td>0.004413</td>\n"," </tr>\n"," <tr>\n"," <th>16</th>\n"," <td>internlm2_5-7b-chat-1m</td>\n"," <td>1.00</td>\n"," <td>0.371530</td>\n"," <td>0.099209</td>\n"," <td>0.363271</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>17</th>\n"," <td>internlm2_5-7b-chat-1m</td>\n"," <td>1.02</td>\n"," <td>0.352901</td>\n"," <td>0.086979</td>\n"," <td>0.340282</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" model rpp meteor bleu_1 rouge_l ews_score \\\n","0 Qwen2-7B-Instruct 1.00 0.373724 0.112364 0.365886 0.000000 \n","1 Qwen2-7B-Instruct 1.02 0.371909 0.110039 0.361165 0.000000 \n","2 Qwen2-7B-Instruct 1.04 0.374066 0.109569 0.361896 0.000000 \n","3 Qwen2-7B-Instruct 1.06 0.374308 0.109368 0.361424 0.000000 \n","4 Qwen2-7B-Instruct 1.08 0.373892 0.112526 0.361545 0.000000 \n","5 Qwen2-7B-Instruct 1.10 0.370259 0.107103 0.358990 0.000000 \n","6 Qwen2-7B-Instruct 1.12 0.370576 0.102582 0.357009 0.000000 \n","7 Qwen2-7B-Instruct 1.14 0.364111 0.099032 0.352458 0.000000 \n","8 Qwen2-7B-Instruct 1.16 0.363139 0.095287 0.349748 0.000000 \n","9 Qwen2-7B-Instruct 1.18 0.362194 0.094761 0.346995 0.000000 \n","10 Qwen2-7B-Instruct 1.20 0.353817 0.089690 0.342185 0.000000 \n","11 Qwen2-7B-Instruct 1.22 0.351813 0.084682 0.337275 0.000000 \n","12 Qwen2-7B-Instruct 1.24 0.347819 0.080593 0.335893 0.000000 \n","13 Qwen2-7B-Instruct 1.26 0.343773 0.073788 0.328662 0.000000 \n","14 Qwen2-7B-Instruct 1.28 0.340176 0.072698 0.326574 0.000000 \n","15 Qwen2-7B-Instruct 1.30 0.335370 0.058301 0.320998 0.004413 \n","16 internlm2_5-7b-chat-1m 1.00 0.371530 0.099209 0.363271 0.000000 \n","17 internlm2_5-7b-chat-1m 1.02 0.352901 0.086979 0.340282 0.000000 \n","\n"," repetition_score total_repetitions \n","0 0.000000 0.000000 \n","1 0.000000 0.000000 \n","2 0.000000 0.000000 \n","3 0.000000 0.000000 \n","4 0.000000 0.000000 \n","5 0.000000 0.000000 \n","6 0.000000 0.000000 \n","7 0.000000 0.000000 \n","8 0.000000 0.000000 \n","9 0.000000 0.000000 \n","10 0.000000 0.000000 \n","11 0.000000 0.000000 \n","12 0.000000 0.000000 \n","13 0.000000 0.000000 \n","14 0.000000 0.000000 \n","15 0.004413 0.004413 \n","16 0.000000 0.000000 \n","17 0.000000 0.000000 "]},"execution_count":79,"metadata":{},"output_type":"execute_result"}],"source":["metrics_df = get_metrics(df)\n","metrics_df"]},{"cell_type":"code","execution_count":82,"metadata":{},"outputs":[],"source":["col = \"Qwen/Qwen2-7B-Instruct/rpp-1.30\"\n","df[[\"ews_score\", \"repetition_score\", \"total_repetitions\"]] = df[col].apply(\n"," detect_scores\n",")"]},{"cell_type":"code","execution_count":88,"metadata":{},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>chinese</th>\n"," <th>english</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.00</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.02</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.04</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.06</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.08</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.10</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.12</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.14</th>\n"," <th>...</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.22</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.24</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.26</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.28</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.30</th>\n"," <th>internlm/internlm2_5-7b-chat-1m/rpp-1.00</th>\n"," <th>internlm/internlm2_5-7b-chat-1m/rpp-1.02</th>\n"," <th>ews_score</th>\n"," <th>repetition_score</th>\n"," <th>total_repetitions</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>905</th>\n"," <td>顾炎武举起酒杯,高声吟道:</td>\n"," <td>Gu Yanwu raised his wine-cup and, in ringing t...</td>\n"," <td>Ku Yanyu lifted his wine cup and recited aloud:</td>\n"," <td>Ku Yanyu lifted his wine cup and recited loudly:</td>\n"," <td>Ku Yanyu raised his cup and recited aloud:</td>\n"," <td>Ku Yanyu raised his wine cup and recited aloud:</td>\n"," <td>Ku Yanyu lifted his wine cup and recited loudly:</td>\n"," <td>Ku Yanyu raised his wine cup and recited aloud:</td>\n"," <td>Ku Yanyu raised his cup and recited loudly:\\n\\...</td>\n"," <td>Ku Yanyu lifted his wine cup and recited loudl...</td>\n"," <td>...</td>\n"," <td>Ku Yanyun raised his cup and recited loudly:\\n...</td>\n"," <td>Ku Yanyuan raised his cup and recited loudly:\\...</td>\n"," <td>Ku Yanyun raised his cup and recited aloud:</td>\n"," <td>Ku Yanyun raised his cup and recited aloud:</td>\n"," <td>Ku Yanyuan raised his cup and recited loudly:\\...</td>\n"," <td>Gu Yanwu raised his wine cup and recited loudly:</td>\n"," <td>Gu Yanwu raised his wine cup and recited loudly:</td>\n"," <td>5</td>\n"," <td>0</td>\n"," <td>5</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1 rows × 23 columns</p>\n","</div>"],"text/plain":[" chinese english \\\n","905 顾炎武举起酒杯,高声吟道: Gu Yanwu raised his wine-cup and, in ringing t... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.00 \\\n","905 Ku Yanyu lifted his wine cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.02 \\\n","905 Ku Yanyu lifted his wine cup and recited loudly: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.04 \\\n","905 Ku Yanyu raised his cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.06 \\\n","905 Ku Yanyu raised his wine cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.08 \\\n","905 Ku Yanyu lifted his wine cup and recited loudly: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.10 \\\n","905 Ku Yanyu raised his wine cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.12 \\\n","905 Ku Yanyu raised his cup and recited loudly:\\n\\... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.14 ... \\\n","905 Ku Yanyu lifted his wine cup and recited loudl... ... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.22 \\\n","905 Ku Yanyun raised his cup and recited loudly:\\n... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.24 \\\n","905 Ku Yanyuan raised his cup and recited loudly:\\... \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.26 \\\n","905 Ku Yanyun raised his cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.28 \\\n","905 Ku Yanyun raised his cup and recited aloud: \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.30 \\\n","905 Ku Yanyuan raised his cup and recited loudly:\\... \n","\n"," internlm/internlm2_5-7b-chat-1m/rpp-1.00 \\\n","905 Gu Yanwu raised his wine cup and recited loudly: \n","\n"," internlm/internlm2_5-7b-chat-1m/rpp-1.02 ews_score \\\n","905 Gu Yanwu raised his wine cup and recited loudly: 5 \n","\n"," repetition_score total_repetitions \n","905 0 5 \n","\n","[1 rows x 23 columns]"]},"execution_count":88,"metadata":{},"output_type":"execute_result"}],"source":["rows = df.query(\"ews_score > 0\")\n","rows"]},{"cell_type":"code","execution_count":92,"metadata":{},"outputs":[],"source":["row = rows.iloc[0]"]},{"cell_type":"code","execution_count":93,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["顾炎武举起酒杯,高声吟道:\n"]}],"source":["print(row[\"chinese\"])"]},{"cell_type":"code","execution_count":94,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Gu Yanwu raised his wine-cup and, in ringing tones, recited the following couplet:\n"]}],"source":["print(row[\"english\"])"]},{"cell_type":"code","execution_count":97,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Ku Yanyuan raised his cup and recited loudly:\n","\n","\"Under Heaven's vast expanse,\n","The world is full of myriad paths;\n","Yet every step I take leads me back home.\n","\n","I've sought solace amidst mountains high,\n","And found tranquility by rivers wide; \n","But my heart yearns for that familiar place.\"\n","\n","He then took a deep sip from his wine goblet before continuing with another verse:\n"," \n","\"In this mortal coil we wander on,\n","Seeking truth where'er our feet have gone;\n","Our souls forever roam like birds free,\n","\n","Yearning always after what was lost or missed,\n","In dreams as well as waking hours past.\"\n"," \n","With each word echoing through the hallways filled with ancient wisdom, Ku’s verses seemed to carry echoes beyond their literal meaning—touching upon themes deeply rooted within human experience: longing, memory, identity, and perhaps most importantly—the universal quest for belongingness amid life’s transient nature. \n","\n","His poetic outpourings resonated not just because they captured moments specific to one man but due to how universally relatable these sentiments truly were—to anyone who has ever felt displaced yet longed for connection, searching for roots amongst shifting sands of time. In essence, he articulated something profound about existence itself—a poignant reminder of humanity's common struggle against loneliness and dislocation even when surrounded by countless others seeking similar answers along parallel journeys across distant lands.\n"]}],"source":["print(row[\"Qwen/Qwen2-7B-Instruct/rpp-1.30\"])"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
 
1
+ {"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/papers/rapget-translation\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","# check if workding_dir is in local variables\n","if \"workding_dir\" not in locals():\n"," workding_dir = str(Path.cwd().parent)\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/papers/rapget-translation/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["Qwen/Qwen2-7B-Instruct None False datasets/mac/mac.tsv results/mac-results.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"DATA_PATH\")\n","results_path = os.getenv(\"RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","Name: torch\n","Version: 2.4.0\n","Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\n","Home-page: https://pytorch.org/\n","Author: PyTorch Team\n","Author-email: packages@pytorch.org\n","License: BSD-3\n","Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages\n","Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions\n","Required-by: accelerate, peft, torchaudio, torchvision\n","---\n","Name: transformers\n","Version: 4.43.3\n","Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow\n","Home-page: https://github.com/huggingface/transformers\n","Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors)\n","Author-email: transformers@huggingface.co\n","License: Apache 2.0 License\n","Location: /Users/inflaton/anaconda3/envs/rapget/lib/python3.11/site-packages\n","Requires: filelock, huggingface-hub, numpy, packaging, pyyaml, regex, requests, safetensors, tokenizers, tqdm\n","Required-by: peft\n","CPU times: user 8.49 ms, sys: 8.92 ms, total: 17.4 ms\n","Wall time: 1.71 s\n"]}],"source":["%%time\n","os.environ[\"TOKENIZERS_PARALLELISM\"] = \"true\"\n","\n","!python --version\n","!pip show torch transformers"]},{"cell_type":"code","execution_count":6,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package wordnet to\n","[nltk_data] /Users/inflaton/nltk_data...\n","[nltk_data] Package wordnet is already up-to-date!\n","[nltk_data] Downloading package punkt to /Users/inflaton/nltk_data...\n","[nltk_data] Package punkt is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to\n","[nltk_data] /Users/inflaton/nltk_data...\n","[nltk_data] Package omw-1.4 is already up-to-date!\n"]},{"name":"stdout","output_type":"stream","text":["loading: /Users/inflaton/code/engd/papers/rapget-translation/eval_modules/calc_repetitions.py\n","loading /Users/inflaton/code/engd/papers/rapget-translation/llm_toolkit/translation_utils.py\n"]},{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package wordnet to\n","[nltk_data] /Users/inflaton/nltk_data...\n","[nltk_data] Package wordnet is already up-to-date!\n","[nltk_data] Downloading package punkt to /Users/inflaton/nltk_data...\n","[nltk_data] Package punkt is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to\n","[nltk_data] /Users/inflaton/nltk_data...\n","[nltk_data] Package omw-1.4 is already up-to-date!\n"]},{"name":"stdout","output_type":"stream","text":["MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.translation_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 1133 entries, 0 to 1132\n","Data columns (total 25 columns):\n"," # Column Non-Null Count Dtype \n","--- ------ -------------- ----- \n"," 0 chinese 1133 non-null object\n"," 1 english 1133 non-null object\n"," 2 Qwen/Qwen2-7B-Instruct/rpp-1.00 1133 non-null object\n"," 3 Qwen/Qwen2-7B-Instruct/rpp-1.02 1133 non-null object\n"," 4 Qwen/Qwen2-7B-Instruct/rpp-1.04 1133 non-null object\n"," 5 Qwen/Qwen2-7B-Instruct/rpp-1.06 1133 non-null object\n"," 6 Qwen/Qwen2-7B-Instruct/rpp-1.08 1133 non-null object\n"," 7 Qwen/Qwen2-7B-Instruct/rpp-1.10 1133 non-null object\n"," 8 Qwen/Qwen2-7B-Instruct/rpp-1.12 1133 non-null object\n"," 9 Qwen/Qwen2-7B-Instruct/rpp-1.14 1133 non-null object\n"," 10 Qwen/Qwen2-7B-Instruct/rpp-1.16 1133 non-null object\n"," 11 Qwen/Qwen2-7B-Instruct/rpp-1.18 1133 non-null object\n"," 12 Qwen/Qwen2-7B-Instruct/rpp-1.20 1133 non-null object\n"," 13 Qwen/Qwen2-7B-Instruct/rpp-1.22 1133 non-null object\n"," 14 Qwen/Qwen2-7B-Instruct/rpp-1.24 1133 non-null object\n"," 15 Qwen/Qwen2-7B-Instruct/rpp-1.26 1133 non-null object\n"," 16 Qwen/Qwen2-7B-Instruct/rpp-1.28 1133 non-null object\n"," 17 Qwen/Qwen2-7B-Instruct/rpp-1.30 1133 non-null object\n"," 18 internlm/internlm2_5-7b-chat-1m/rpp-1.00 1133 non-null object\n"," 19 internlm/internlm2_5-7b-chat-1m/rpp-1.02 1133 non-null object\n"," 20 shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.00 1133 non-null object\n"," 21 shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.02 1133 non-null object\n"," 22 shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.04 1133 non-null object\n"," 23 Qwen/Qwen2-72B-Instruct/rpp-1.00 1133 non-null object\n"," 24 shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.06 1133 non-null object\n","dtypes: object(25)\n","memory usage: 221.4+ KB\n"]}],"source":["import pandas as pd\n","\n","df = pd.read_csv(results_path)\n","df.info()"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"980ef1609a5c4c33af7af35b73acff51","version_major":2,"version_minor":0},"text/plain":["tokenizer_config.json: 0%| | 0.00/1.29k [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"619283932ad349fb95572b78d3f7dcbc","version_major":2,"version_minor":0},"text/plain":["vocab.json: 0%| | 0.00/2.78M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c16727e4a5e244e497903cf2aeb650b9","version_major":2,"version_minor":0},"text/plain":["merges.txt: 0%| | 0.00/1.67M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"33eff2ede931445e8f555d3ac026a05d","version_major":2,"version_minor":0},"text/plain":["tokenizer.json: 0%| | 0.00/7.03M [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["Qwen/Qwen2-7B-Instruct/rpp-1.00: {'meteor': 0.3757937058055942, 'bleu_scores': {'bleu': 0.11257687997946404, 'precisions': [0.4221057489451477, 0.15152552819915763, 0.07046669041681511, 0.03563738956121464], 'brevity_penalty': 1.0, 'length_ratio': 1.004836038423319, 'translation_length': 30336, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42401446445760516, 'rouge2': 0.1745373576657272, 'rougeL': 0.3644119580672682, 'rougeLsum': 0.36453929652705913}, 'accuracy': 0.00088261253309797, 'correct_ids': [364]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.02: {'meteor': 0.3768162203335968, 'bleu_scores': {'bleu': 0.11553860771639841, 'precisions': [0.421923611570795, 0.15446511467968776, 0.07288535852297123, 0.03751491646778043], 'brevity_penalty': 1.0, 'length_ratio': 1.0007949652202717, 'translation_length': 30214, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4277727343811012, 'rouge2': 0.17995005165108852, 'rougeL': 0.3697073813890631, 'rougeLsum': 0.3698470236105008}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.04: {'meteor': 0.3715147429622351, 'bleu_scores': {'bleu': 0.11311605625702598, 'precisions': [0.41758205508824014, 0.15180590775135358, 0.07144639737602053, 0.036148159155923766], 'brevity_penalty': 1.0, 'length_ratio': 1.0041404438555812, 'translation_length': 30315, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41900500358366954, 'rouge2': 0.17395330783197704, 'rougeL': 0.363516824567636, 'rougeLsum': 0.36370506922511386}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 533]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.06: {'meteor': 0.3721614566005243, 'bleu_scores': {'bleu': 0.10986034422062402, 'precisions': [0.41770767752410615, 0.14848860428286167, 0.06846272346218608, 0.03435399551904406], 'brevity_penalty': 0.9996355745538857, 'length_ratio': 0.9996356409407089, 'translation_length': 30179, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4187778540003171, 'rouge2': 0.17074289602424497, 'rougeL': 0.36110959197290915, 'rougeLsum': 0.3610861422620024}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.08: {'meteor': 0.3712966405354824, 'bleu_scores': {'bleu': 0.10809530671609749, 'precisions': [0.41541684679591634, 0.14717672264842077, 0.06768566804531559, 0.033518296340731855], 'brevity_penalty': 0.9960505187431468, 'length_ratio': 0.9960582974494866, 'translation_length': 30071, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41849370939695263, 'rouge2': 0.17273361236453028, 'rougeL': 0.3597975248974353, 'rougeLsum': 0.3597112590854938}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.10: {'meteor': 0.3713527017404089, 'bleu_scores': {'bleu': 0.10809698094017595, 'precisions': [0.4147023571713943, 0.145728817077812, 0.06795102628736047, 0.03393775575327552], 'brevity_penalty': 0.9948859408394681, 'length_ratio': 0.9948989731699238, 'translation_length': 30036, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41787894991500074, 'rouge2': 0.17248593100252707, 'rougeL': 0.35939500988600664, 'rougeLsum': 0.35964912174733266}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 533, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.12: {'meteor': 0.36820419885143935, 'bleu_scores': {'bleu': 0.10505573355971856, 'precisions': [0.4098240955857949, 0.14277339035072595, 0.06492248062015504, 0.03232202311922487], 'brevity_penalty': 0.9980106107363413, 'length_ratio': 0.9980125869493209, 'translation_length': 30130, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41566325374438334, 'rouge2': 0.16882935534885601, 'rougeL': 0.35849783468706165, 'rougeLsum': 0.3585369905973183}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.14: {'meteor': 0.36307746488229864, 'bleu_scores': {'bleu': 0.10051614663163566, 'precisions': [0.4013952416992991, 0.13692917692097348, 0.06165771788216051, 0.030122267506483884], 'brevity_penalty': 1.0, 'length_ratio': 1.0065915866180855, 'translation_length': 30389, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.40852399119372523, 'rouge2': 0.1618558642437174, 'rougeL': 0.34972097203903363, 'rougeLsum': 0.3499725519247824}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 658]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.16: {'meteor': 0.36060381551154586, 'bleu_scores': {'bleu': 0.09572351387840275, 'precisions': [0.3943648240226187, 0.13195897159052566, 0.05795474478161726, 0.027838667251205613], 'brevity_penalty': 1.0, 'length_ratio': 1.019244783040742, 'translation_length': 30771, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4072114863784019, 'rouge2': 0.16095850760106328, 'rougeL': 0.3505813903872668, 'rougeLsum': 0.350857750406721}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 533]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.18: {'meteor': 0.36078545841521914, 'bleu_scores': {'bleu': 0.09571300097111912, 'precisions': [0.3949360480292352, 0.13088260206674573, 0.05813543795363258, 0.027927630371756763], 'brevity_penalty': 1.0, 'length_ratio': 1.0151705862868499, 'translation_length': 30648, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4049921584635483, 'rouge2': 0.15818085671380633, 'rougeL': 0.34452645012240113, 'rougeLsum': 0.3447993602053052}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.20: {'meteor': 0.3567548354175595, 'bleu_scores': {'bleu': 0.0912485469982839, 'precisions': [0.3872236189002772, 0.12631719800622218, 0.05570236439499304, 0.025445200521210368], 'brevity_penalty': 1.0, 'length_ratio': 1.0276912885061278, 'translation_length': 31026, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4037433641842071, 'rouge2': 0.15734088954173536, 'rougeL': 0.3443621566475431, 'rougeLsum': 0.34458326374651166}, 'accuracy': 0.00176522506619594, 'correct_ids': [364, 658]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.22: {'meteor': 0.3510044718361491, 'bleu_scores': {'bleu': 0.08350689777294566, 'precisions': [0.3702997530709843, 0.11766040181464679, 0.050021865644027316, 0.02231237322515213], 'brevity_penalty': 1.0, 'length_ratio': 1.059721762172905, 'translation_length': 31993, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3961234054193029, 'rouge2': 0.15316633599974355, 'rougeL': 0.33707038471786877, 'rougeLsum': 0.3377136994023924}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.24: {'meteor': 0.3465600044661264, 'bleu_scores': {'bleu': 0.07954262823239741, 'precisions': [0.3656348982343902, 0.11231059374390323, 0.04652104925559569, 0.020954720954720955], 'brevity_penalty': 1.0, 'length_ratio': 1.056210665783372, 'translation_length': 31887, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3939302447273615, 'rouge2': 0.14898746802611904, 'rougeL': 0.33336984326977515, 'rougeLsum': 0.33359883382678}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.26: {'meteor': 0.3435165661403993, 'bleu_scores': {'bleu': 0.07858780987337025, 'precisions': [0.35780525502318394, 0.1090751833936637, 0.04563887780880202, 0.02141475545730865], 'brevity_penalty': 1.0, 'length_ratio': 1.0715468698244452, 'translation_length': 32350, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39047036646637606, 'rouge2': 0.14822657459211233, 'rougeL': 0.3305532882619594, 'rougeLsum': 0.33076055862139775}, 'accuracy': 0.00353045013239188, 'correct_ids': [240, 364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.28: {'meteor': 0.34053363547339577, 'bleu_scores': {'bleu': 0.07203840378380885, 'precisions': [0.3451020592757862, 0.10142348754448399, 0.0418756674541277, 0.018374202216996975], 'brevity_penalty': 1.0, 'length_ratio': 1.0986088108645247, 'translation_length': 33167, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.3868635813119549, 'rouge2': 0.1433239651355812, 'rougeL': 0.32655966155619864, 'rougeLsum': 0.3274799622299468}, 'accuracy': 0.00264783759929391, 'correct_ids': [364, 658, 659]}\n","Qwen/Qwen2-7B-Instruct/rpp-1.30: {'meteor': 0.33446931317267503, 'bleu_scores': {'bleu': 0.062148408497464926, 'precisions': [0.3152004454342984, 0.08905625664759824, 0.035419266654781005, 0.015004765858008178], 'brevity_penalty': 1.0, 'length_ratio': 1.1897979463398476, 'translation_length': 35920, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.38254665657618103, 'rouge2': 0.14281275063088927, 'rougeL': 0.3221374694466569, 'rougeLsum': 0.32302246499181286}, 'accuracy': 0.00264783759929391, 'correct_ids': [240, 364, 659]}\n","internlm/internlm2_5-7b-chat-1m/rpp-1.00: {'meteor': 0.3715346402699926, 'bleu_scores': {'bleu': 0.1059772684959813, 'precisions': [0.39683339104158144, 0.1431975453714584, 0.06656950140663662, 0.03334508283397956], 'brevity_penalty': 1.0, 'length_ratio': 1.0523020867837032, 'translation_length': 31769, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.41979227117592, 'rouge2': 0.17040991088366783, 'rougeL': 0.3634941339105341, 'rougeLsum': 0.3635283363388828}, 'accuracy': 0.0, 'correct_ids': []}\n","internlm/internlm2_5-7b-chat-1m/rpp-1.02: {'meteor': 0.352901317633597, 'bleu_scores': {'bleu': 0.08697903417673139, 'precisions': [0.3666595931730682, 0.11979657185910718, 0.05260074213918365, 0.024771882392700235], 'brevity_penalty': 1.0, 'length_ratio': 1.0926465717124876, 'translation_length': 32987, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.39992923111026313, 'rouge2': 0.15207331353194808, 'rougeL': 0.34035978429245106, 'rougeLsum': 0.341290568947192}, 'accuracy': 0.00088261253309797, 'correct_ids': [511]}\n","shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.00: {'meteor': 0.38168584246814397, 'bleu_scores': {'bleu': 0.11518296996672078, 'precisions': [0.42672642762284196, 0.15593196950357058, 0.07280560043080236, 0.036672529281892005], 'brevity_penalty': 0.9976786612989592, 'length_ratio': 0.9976813514408744, 'translation_length': 30120, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4255719662506292, 'rouge2': 0.17616774969423554, 'rougeL': 0.37055206565267934, 'rougeLsum': 0.3705658811911612}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 531]}\n","shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.02: {'meteor': 0.381084663579427, 'bleu_scores': {'bleu': 0.11434064727385712, 'precisions': [0.42645298576938423, 0.15516705248246554, 0.07212973283952392, 0.03635818433974287], 'brevity_penalty': 0.996216776830359, 'length_ratio': 0.9962239152037098, 'translation_length': 30076, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42485659188412694, 'rouge2': 0.17530658655542805, 'rougeL': 0.3698083401740613, 'rougeLsum': 0.36990591561274305}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 531]}\n","shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.04: {'meteor': 0.38019108433175514, 'bleu_scores': {'bleu': 0.11353152954579881, 'precisions': [0.42572246637368494, 0.15441303670899215, 0.0716574844262, 0.03599984984421337], 'brevity_penalty': 0.9948859408394681, 'length_ratio': 0.9948989731699238, 'translation_length': 30036, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4241330238907117, 'rouge2': 0.17497479896456675, 'rougeL': 0.36923859400701475, 'rougeLsum': 0.3694255854056052}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 531]}\n","Qwen/Qwen2-72B-Instruct/rpp-1.00: {'meteor': 0.39496912014495184, 'bleu_scores': {'bleu': 0.12294894050451377, 'precisions': [0.42391407360606537, 0.1626695498329074, 0.079349416448331, 0.041761041902604754], 'brevity_penalty': 1.0, 'length_ratio': 1.048526001987413, 'translation_length': 31655, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.4424744401623645, 'rouge2': 0.19175162690143255, 'rougeL': 0.3838394374509765, 'rougeLsum': 0.3843717711772543}, 'accuracy': 0.0, 'correct_ids': []}\n","shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.06: {'meteor': 0.37862157681270814, 'bleu_scores': {'bleu': 0.11220469680226439, 'precisions': [0.42524207011686144, 0.15293056182114723, 0.07094094274878093, 0.03547621737656762], 'brevity_penalty': 0.9920186657513808, 'length_ratio': 0.9920503477972838, 'translation_length': 29950, 'reference_length': 30190}, 'rouge_scores': {'rouge1': 0.42376777516405656, 'rouge2': 0.17397376864723724, 'rougeL': 0.36869400213895775, 'rougeLsum': 0.36887041308935314}, 'accuracy': 0.00176522506619594, 'correct_ids': [77, 531]}\n"]},{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>model</th>\n"," <th>rpp</th>\n"," <th>meteor</th>\n"," <th>bleu_1</th>\n"," <th>rouge_l</th>\n"," <th>ews_score</th>\n"," <th>repetition_score</th>\n"," <th>total_repetitions</th>\n"," <th>num_entries_with_max_output_tokens</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.00</td>\n"," <td>0.375794</td>\n"," <td>0.112577</td>\n"," <td>0.364412</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.02</td>\n"," <td>0.376816</td>\n"," <td>0.115539</td>\n"," <td>0.369707</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.04</td>\n"," <td>0.371515</td>\n"," <td>0.113116</td>\n"," <td>0.363517</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.06</td>\n"," <td>0.372161</td>\n"," <td>0.109860</td>\n"," <td>0.361110</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.08</td>\n"," <td>0.371297</td>\n"," <td>0.108095</td>\n"," <td>0.359798</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>5</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.10</td>\n"," <td>0.371353</td>\n"," <td>0.108097</td>\n"," <td>0.359395</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.12</td>\n"," <td>0.368204</td>\n"," <td>0.105056</td>\n"," <td>0.358498</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.14</td>\n"," <td>0.363077</td>\n"," <td>0.100516</td>\n"," <td>0.349721</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.16</td>\n"," <td>0.360604</td>\n"," <td>0.095724</td>\n"," <td>0.350581</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>9</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.18</td>\n"," <td>0.360785</td>\n"," <td>0.095713</td>\n"," <td>0.344526</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>10</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.20</td>\n"," <td>0.356755</td>\n"," <td>0.091249</td>\n"," <td>0.344362</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.22</td>\n"," <td>0.351004</td>\n"," <td>0.083507</td>\n"," <td>0.337070</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>12</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.24</td>\n"," <td>0.346560</td>\n"," <td>0.079543</td>\n"," <td>0.333370</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>13</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.26</td>\n"," <td>0.343517</td>\n"," <td>0.078588</td>\n"," <td>0.330553</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>14</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.28</td>\n"," <td>0.340534</td>\n"," <td>0.072038</td>\n"," <td>0.326560</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>15</th>\n"," <td>Qwen/Qwen2-7B-Instruct</td>\n"," <td>1.30</td>\n"," <td>0.334469</td>\n"," <td>0.062148</td>\n"," <td>0.322137</td>\n"," <td>0.005296</td>\n"," <td>0.005296</td>\n"," <td>0.005296</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>16</th>\n"," <td>internlm/internlm2_5-7b-chat-1m</td>\n"," <td>1.00</td>\n"," <td>0.371535</td>\n"," <td>0.105977</td>\n"," <td>0.363494</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>17</th>\n"," <td>internlm/internlm2_5-7b-chat-1m</td>\n"," <td>1.02</td>\n"," <td>0.352901</td>\n"," <td>0.086979</td>\n"," <td>0.340360</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>18</th>\n"," <td>shenzhi-wang/Llama3.1-70B-Chinese-Chat</td>\n"," <td>1.00</td>\n"," <td>0.381686</td>\n"," <td>0.115183</td>\n"," <td>0.370552</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>19</th>\n"," <td>shenzhi-wang/Llama3.1-70B-Chinese-Chat</td>\n"," <td>1.02</td>\n"," <td>0.381085</td>\n"," <td>0.114341</td>\n"," <td>0.369808</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>20</th>\n"," <td>shenzhi-wang/Llama3.1-70B-Chinese-Chat</td>\n"," <td>1.04</td>\n"," <td>0.380191</td>\n"," <td>0.113532</td>\n"," <td>0.369239</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>21</th>\n"," <td>Qwen/Qwen2-72B-Instruct</td>\n"," <td>1.00</td>\n"," <td>0.394969</td>\n"," <td>0.122949</td>\n"," <td>0.383839</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>22</th>\n"," <td>shenzhi-wang/Llama3.1-70B-Chinese-Chat</td>\n"," <td>1.06</td>\n"," <td>0.378622</td>\n"," <td>0.112205</td>\n"," <td>0.368694</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0.000000</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" model rpp meteor bleu_1 \\\n","0 Qwen/Qwen2-7B-Instruct 1.00 0.375794 0.112577 \n","1 Qwen/Qwen2-7B-Instruct 1.02 0.376816 0.115539 \n","2 Qwen/Qwen2-7B-Instruct 1.04 0.371515 0.113116 \n","3 Qwen/Qwen2-7B-Instruct 1.06 0.372161 0.109860 \n","4 Qwen/Qwen2-7B-Instruct 1.08 0.371297 0.108095 \n","5 Qwen/Qwen2-7B-Instruct 1.10 0.371353 0.108097 \n","6 Qwen/Qwen2-7B-Instruct 1.12 0.368204 0.105056 \n","7 Qwen/Qwen2-7B-Instruct 1.14 0.363077 0.100516 \n","8 Qwen/Qwen2-7B-Instruct 1.16 0.360604 0.095724 \n","9 Qwen/Qwen2-7B-Instruct 1.18 0.360785 0.095713 \n","10 Qwen/Qwen2-7B-Instruct 1.20 0.356755 0.091249 \n","11 Qwen/Qwen2-7B-Instruct 1.22 0.351004 0.083507 \n","12 Qwen/Qwen2-7B-Instruct 1.24 0.346560 0.079543 \n","13 Qwen/Qwen2-7B-Instruct 1.26 0.343517 0.078588 \n","14 Qwen/Qwen2-7B-Instruct 1.28 0.340534 0.072038 \n","15 Qwen/Qwen2-7B-Instruct 1.30 0.334469 0.062148 \n","16 internlm/internlm2_5-7b-chat-1m 1.00 0.371535 0.105977 \n","17 internlm/internlm2_5-7b-chat-1m 1.02 0.352901 0.086979 \n","18 shenzhi-wang/Llama3.1-70B-Chinese-Chat 1.00 0.381686 0.115183 \n","19 shenzhi-wang/Llama3.1-70B-Chinese-Chat 1.02 0.381085 0.114341 \n","20 shenzhi-wang/Llama3.1-70B-Chinese-Chat 1.04 0.380191 0.113532 \n","21 Qwen/Qwen2-72B-Instruct 1.00 0.394969 0.122949 \n","22 shenzhi-wang/Llama3.1-70B-Chinese-Chat 1.06 0.378622 0.112205 \n","\n"," rouge_l ews_score repetition_score total_repetitions \\\n","0 0.364412 0.000000 0.000000 0.000000 \n","1 0.369707 0.000000 0.000000 0.000000 \n","2 0.363517 0.000000 0.000000 0.000000 \n","3 0.361110 0.000000 0.000000 0.000000 \n","4 0.359798 0.000000 0.000000 0.000000 \n","5 0.359395 0.000000 0.000000 0.000000 \n","6 0.358498 0.000000 0.000000 0.000000 \n","7 0.349721 0.000000 0.000000 0.000000 \n","8 0.350581 0.000000 0.000000 0.000000 \n","9 0.344526 0.000000 0.000000 0.000000 \n","10 0.344362 0.000000 0.000000 0.000000 \n","11 0.337070 0.000000 0.000000 0.000000 \n","12 0.333370 0.000000 0.000000 0.000000 \n","13 0.330553 0.000000 0.000000 0.000000 \n","14 0.326560 0.000000 0.000000 0.000000 \n","15 0.322137 0.005296 0.005296 0.005296 \n","16 0.363494 0.000000 0.000000 0.000000 \n","17 0.340360 0.000000 0.000000 0.000000 \n","18 0.370552 0.000000 0.000000 0.000000 \n","19 0.369808 0.000000 0.000000 0.000000 \n","20 0.369239 0.000000 0.000000 0.000000 \n","21 0.383839 0.000000 0.000000 0.000000 \n","22 0.368694 0.000000 0.000000 0.000000 \n","\n"," num_entries_with_max_output_tokens \n","0 0 \n","1 0 \n","2 0 \n","3 0 \n","4 0 \n","5 0 \n","6 0 \n","7 0 \n","8 0 \n","9 0 \n","10 0 \n","11 0 \n","12 0 \n","13 0 \n","14 0 \n","15 1 \n","16 0 \n","17 0 \n","18 0 \n","19 0 \n","20 0 \n","21 0 \n","22 0 "]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["metrics_df = get_metrics(df)\n","metrics_df"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["tokenizers = {\n"," model: load_tokenizer(model) for model in metrics_df[\"model\"].unique()\n","}"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[],"source":["col = \"Qwen/Qwen2-7B-Instruct/rpp-1.30\"\n","df[[\"ews_score\", \"repetition_score\", \"total_repetitions\"]] = df[col].apply(\n"," detect_scores\n",")\n","df[\"output_tokens\"] = df[col].apply(\n"," lambda x: len(tokenizers[col.split(\"/rpp\")[0]](x)[\"input_ids\"])\n",")\n"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>chinese</th>\n"," <th>english</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.00</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.02</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.04</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.06</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.08</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.10</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.12</th>\n"," <th>Qwen/Qwen2-7B-Instruct/rpp-1.14</th>\n"," <th>...</th>\n"," <th>internlm/internlm2_5-7b-chat-1m/rpp-1.02</th>\n"," <th>shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.00</th>\n"," <th>shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.02</th>\n"," <th>shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.04</th>\n"," <th>Qwen/Qwen2-72B-Instruct/rpp-1.00</th>\n"," <th>shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.06</th>\n"," <th>ews_score</th>\n"," <th>repetition_score</th>\n"," <th>total_repetitions</th>\n"," <th>output_tokens</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>531</th>\n"," <td>14.分娩</td>\n"," <td>Childbirth</td>\n"," <td>14. Labor</td>\n"," <td>14. Childbirth</td>\n"," <td>14. Childbirth</td>\n"," <td>14. Labor</td>\n"," <td>14. Labor</td>\n"," <td>14. Labor</td>\n"," <td>14. Childbirth</td>\n"," <td>14. Childbirth</td>\n"," <td>...</td>\n"," <td>Translation: Delivery\\n\\nThe word \"分娩\" is a co...</td>\n"," <td>Childbirth</td>\n"," <td>Childbirth</td>\n"," <td>Childbirth</td>\n"," <td>14. Childbirth</td>\n"," <td>Childbirth</td>\n"," <td>6</td>\n"," <td>42</td>\n"," <td>48</td>\n"," <td>2048</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1 rows × 29 columns</p>\n","</div>"],"text/plain":[" chinese english Qwen/Qwen2-7B-Instruct/rpp-1.00 \\\n","531 14.分娩 Childbirth 14. Labor \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.02 Qwen/Qwen2-7B-Instruct/rpp-1.04 \\\n","531 14. Childbirth 14. Childbirth \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.06 Qwen/Qwen2-7B-Instruct/rpp-1.08 \\\n","531 14. Labor 14. Labor \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.10 Qwen/Qwen2-7B-Instruct/rpp-1.12 \\\n","531 14. Labor 14. Childbirth \n","\n"," Qwen/Qwen2-7B-Instruct/rpp-1.14 ... \\\n","531 14. Childbirth ... \n","\n"," internlm/internlm2_5-7b-chat-1m/rpp-1.02 \\\n","531 Translation: Delivery\\n\\nThe word \"分娩\" is a co... \n","\n"," shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.00 \\\n","531 Childbirth \n","\n"," shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.02 \\\n","531 Childbirth \n","\n"," shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.04 \\\n","531 Childbirth \n","\n"," Qwen/Qwen2-72B-Instruct/rpp-1.00 \\\n","531 14. Childbirth \n","\n"," shenzhi-wang/Llama3.1-70B-Chinese-Chat/rpp-1.06 ews_score \\\n","531 Childbirth 6 \n","\n"," repetition_score total_repetitions output_tokens \n","531 42 48 2048 \n","\n","[1 rows x 29 columns]"]},"execution_count":14,"metadata":{},"output_type":"execute_result"}],"source":["rows = df.query(\"ews_score > 0\")\n","rows"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[],"source":["row = rows.iloc[0]"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["14.分娩\n"]}],"source":["print(row[\"chinese\"])"]},{"cell_type":"code","execution_count":17,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Childbirth\n"]}],"source":["print(row[\"english\"])"]},{"cell_type":"code","execution_count":18,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Title: Birth\n","\n","Translation:\n","\n","Birth is a fundamental life event that marks the transition from dependency on another organism (usually through parental care) for survival to independence as part of human society's reproductive cycle.\n","\n","The process typically involves several key stages:\n","- **Fertilization**: The fusion of sperm with egg cells inside or outside the female body leads to conception.\n"," \n"," - This can occur naturally during sexual intercourse when both partners contribute their gametes towards fertilizing each other’s eggs/ sperms respectively leading to pregnancy formation within about two weeks after ovulation if conditions allow successful implantation onto endometrial lining post-fertilisation phase where it then begins growing until birth occurs usually around nine months later due primarily biological reasons related to gestational development needed by fetus before delivery possible without significant harm inflicted upon either mother nor baby involved thereby ensuring healthy offspring capable surviving independently thereafter once born alive at full term under ideal circumstances provided no complications arise which may necessitate medical intervention depending severity & nature thereof thus prioritising maternal health alongside child welfare throughout prenatal period especially considering unique individual differences amongst pregnant women including age physical condition emotional state lifestyle choices dietary habits etcetera influencing outcomes significantly hence requiring tailored approaches encompassing comprehensive healthcare support systems designed specifically targeting specific needs based off empirical evidence clinical guidelines research studies patient feedback among others fostering optimal growth environment nurturing potential while mitigating risks associated therein aiming ultimately toward achieving best outcome feasible given current available resources technologies practices expertise knowledge base pertaining subject matter area accordingly taking necessary precautions preventing adverse effects whenever practical enhancing overall quality experience maximizing benefits minimizing harms benefiting all stakeholders concerned equally across spectrum promoting equity fairness justice accessibility affordability sustainability long-term well-being inclusive participation sustainable progress societal harmony environmental stewardship economic prosperity social cohesion strengthening community bonds enriching lives improving global public health status reducing disparities increasing access opportunities empowering individuals families communities nations regions worldwide striving together collectively towards shared goals common good collective advancement humanity improvement quality enhancement longevity extension happiness maximization satisfaction achievement equitable distribution resource utilization optimization efficient allocation leveraging advancements harness technological innovations optimizing workflows streamlining processes automating tasks enhancing productivity efficiency effectiveness safety security privacy protection personal autonomy dignity rights freedom enabling people thrive flourish reach one’s fullest potential overcome challenges obstacles adversity live meaningful purposeful fulfilling satisfying lives contributing positively making valuable contributions shaping world future generations building resilient adaptable societies thriving amidst rapid changes uncertainties embracing innovation adaptability flexibility resilience cultivating creativity critical thinking skills problem-solving abilities leadership qualities teamwork collaboration spirit empathy compassion kindness gratitude mindfulness self-awareness self-regulation lifelong learning capacity adapting evolving continuously transforming oneself society whole expanding horizons opening doors possibilities exploring uncharted territories discovering new frontiers uncovering hidden gems unlocking mysteries solving puzzles unraveling complex issues finding solutions overcoming barriers facilitating breakthroughs advancing fields pioneering discoveries setting precedents establishing norms creating value adding meaning generating ideas insights sparking imagination inspiration driving change disruption revolution transformation elevating standards excellence performance metrics benchmarks reaching milestones achievements recognizing accomplishments celebrating successes rewarding efforts acknowledging hard work dedication commitment passion pursuing dreams aspirations ambitions ideals values beliefs principles ethics morals spirituality faith hope courage perseverance patience optimism resilience determination diligence discipline foresight strategic planning execution monitoring evaluating adjusting refining iterating perfecting implementing scaling replicating disseminating sharing spreading awareness educating informing inspiring mobilizing action engaging citizens participating democracy governance decision-making policy formulation implementation evaluation accountability transparency openness inclusivity diversity equality empowerment civic engagement volunteering philanthropy activism advocacy lobbying campaigning fundraising investing supporting causes initiatives programs projects campaigns movements platforms networks ecosystems partnerships collaborations alliances synergies coalitions coordination cooperation coalition-building alliance-forming networking connecting bridging gaps closing divides uniting fragmented groups aligning interests objectives visions missions rallying behind common cause championing progressive agendas advocating policies reforms legislation regulations standards frameworks protocols procedures rules laws charters constitutions treaties agreements conventions accords memorandums understandings commitments pledges resolutions declarations endorsements certifications acknowledgments appreciations honors awards recognitions commendations congratulatory messages accolades tributes salutations greetings expressions thanks appreciation respect admiration recognition celebration acknowledgement praise congratulations felicitations gratitudes blessings wishes prayers goodwill peace love joy generosity warmth comfort relief encouragement strength wisdom guidance healing restoration balance equilibrium stability unity solidarity harmony coexistence mutual understanding acceptance tolerance forgiveness reconciliation peaceful resolution conflict prevention management deescalation diplomacy negotiation mediation arbitration conciliation settlement dispute resolution compromise concession surrender yielding acquiescence compliance agreement accommodation adjustment adaptation assimilation integration harmonious relations friendly ties cooperative partnership collaborative effort joint venture shared responsibility communitarianism collectivism altruistic behavior moral conduct ethical integrity honesty trustworthiness benevolence kindness helpfulness courtesy politeness graciousness humility modesty grace elegance sophistication refinement eloquence articulateness clarity precision brevity succinctness fluency idiomatic expression metaphors analogies symbolism imagery figures speech poetic language literary devices rhetorical strategies persuasive arguments compelling narratives powerful storytelling vivid descriptions evocative phrasing emotive tone impactful messaging influential communication effective persuasion argumentation logical reasoning sound judgment discernment insight perception intuition cognition cognitive processing analytical synthesis creative ideation divergent convergent thought reflection contemplation introspection metacognition goal-setting intentionality focus concentration motivation ambition aspiration drive desire enthusiasm zeal fervor ardor excitement anticipation eagerness willingness capability competence proficiency skillfulness mastery accomplishment success attainment realization fulfillment embodiment manifestation creation invention discovery exploration expansion enlightenment awakening liberation emancipation elevation transcendental spiritual journey soul-searching existential quest inner voyage transformative experience profound impact lasting legacy positive influence constructive contribution innovative solution addressing pressing concerns tackling major problems resolving crucial matters bringing about beneficial transformations impacting countless lives reshaping futures laying foundations paving paths forging ahead steering destinies navigating treacherous waters conquering formidable odds triumphantly emerging victorious persevering against all odds succeeding beyond expectations surpassing limitations boundaries constraints attaining heights previously unimaginable experiencing moments wonder awe amazement delight pleasure bliss ecstasy serenity calm tranquility peace joy satisfaction pride ownership control agency authority power influence effect efficacy potency versatility robustness endurance fortitude tenacity persistence vigor dynamism energy momentum surge propulsion impetus forward movement progression evolution emergence ascension culmination climax apex pinnacle zenith horizon edge boundary threshold frontier limit ceiling floor ground level ascending descending orbit trajectory path direction route destination endpoint origin return loop sequence series continuum flow rhythm pattern melody music dance choreography synchronization timing pacing tempo beat pulse resonance vibration oscillation fluctuation variation mutation divergence convergence symmetry asymmetry chaos order organization structure system architecture blueprint design layout framework outline map diagram illustration graphic representation visualization conceptual model theoretical construct mathematical equation formula algorithm procedure protocol guideline standard practice routine habit ritual custom tradition folklore myth legend story narrative poem song ballad ode epic allegory parable fable proverb riddle joke anecdote tale account record documentation report analysis assessment critique commentary review summary overview conclusion recommendation proposal plan strategy tactic approach method technique tool instrument mechanism device apparatus appliance software hardware infrastructure logistics supply chain inventory procurement finance accounting budget expenditure revenue profit loss financial statement fiscal year annual forecast projection scenario modeling simulation forecasting predicting estimating measuring calculating quantifying analyzing interpreting visualizing presenting communicating explaining clarifying answering questions providing information facts data statistics details context background explanation justification reason motive objective subjective perspective viewpoint stance attitude belief conviction certainty uncertainty risk reward trade-off opportunity cost marginal benefit incremental gain leverage advantage superiority over competition differentiation distinctiveness uniqueness novelty originality authenticity credibility reliability dependability predictability consistency coherence plausibility truth falsity validity accuracy completeness relevance timeliness availability scalability interoperability modularity extensibility portability maintainability usability learnability user-friendly intuitive simplicity complexity comprehensibility readability navigatability discoverability findabilty ease-of-use multifunctional multi-purpose versatile applicability wide-ranging scope broadening capabilities widening application domains industry sectors vertical markets horizontal markets cross-industry applications interdisciplinary field inter-disciplinary study transdisciplinary inquiry integrated solutions holistic view multidisciplinary perspectives systemic thinking interconnected networked ecosystem dynamic adaptive responsive flexible scalable modular distributed parallel computing cloud-based services platform-as-a-service infrastructure-as-a-service software-defined everything service-oriented architectures microservices containers orchestration automation code-driven deployment continuous integration testing automated QA machine learning AI deep neural nets natural language processing robotics IoT blockchain cryptography cybersecurity digital identity authentication authorization encryption decryption secure coding practices vulnerability mitigation threat detection response proactive defense cyber hygiene zero-day exploits ransomware malware phishing attacks denial-of-service DOS DDoS man-in-the-middle MITM replay attack SQL injection XSS CSRF buffer overflow rootkit trojan horse spyware adware botnet zombie node honeypot trapdoor backdoors vulnerabilities exploitation patching firmware updates patches hotfixes bug fixes upgrades maintenance operations ITIL DevOps agile methodology project management time-to-market competitive speed agility innovation experimentation iteration prototyping A/B testing split testing multivariate tests conversion rate optimization CRO landing pages bounce rates click-through-rate CTAs form submissions sales conversions customer acquisition churn retention upselling downselling product pricing marketing ROI KPIs GA tracking analytics dashboards reports business intelligence BI big-data predictive models statistical analyses econometrics simulations forecasting algorithms machine-learning models clustering classification regression anomaly-detection reinforcement-learning Q-Learning SARSA temporal difference TD active-learning semi-supervised supervised unsupervised ensemble methods random forests gradient boosting XGBoost Catboost AdaBoost naive-Bayesian SVM kernel-SVM LDA PCA ICA EM DBSCAN k-means hierarchical-clustering Gaussian-mixture-models t-Distributed Stochastic Neighbor Embedding UMAP dimension-reduction techniques feature-extraction vector-space representations word embeddings semantic similarity cosine-similarity Jaccard-index TF-IDF bag-of-words document-topic matrix factorization recommender-systems association-rules item-collaborative-filtering latent-factor-analysis sparse matrices linear-algebra numerical-methods calculus probability theory graph theory combinatorics logic formal languages programming paradigms object-oriented functional declarative imperative low-level high-level interpreted compiled just-in-time JIT virtual machines runtime environments operating systems APIs libraries frameworks modules dependencies version-control Git repositories remote-work tools team-management collaboration soft-skills technical-wrangling debugging profiling tuning benchmarking validation verification test cases smoke-tests sanity-checks load-testing stress-testing concurrency threading race-condition deadlocks livelocks starvation atomic transactions database normalization ACID properties replication sharding partitioning indexing caching NoSQL relational databases RDBMS MySQL PostgreSQL MongoDB Redis Elasticsearch Kubernetes Docker AWS Azure Google Cloud Platform GCP OpenShift Jenkins CircleCI Travis CI CodePipeline GitHub Bitbucket GitLab Atlassian Bamboo Trello Asana Jira Confluence Slack Microsoft Teams Zoom WebEx BlueJeans Hangouts Meet GoToMeeting TeamViewer Skype Outlook calendar scheduling meetings web-conferencing online-platforms videoconferencing file-sharing storage backup disaster recovery DR cloud-storage NAS SAN deduplicated replicated encrypted compressed optimized streaming real-time collaboration workflow automation task-tracking issue-tracker pull-request reviews commit-history merge\n","----detect excessive whitespaces----\n","removed excessive whitespaces: 6\n","----detect text repetitions----\n","<re.Match object; span=(7590, 7610), match='ition cognition cogn'>\n","Group 1 found at 7590-7600: `ition cogn`\n","Group 2 found at 7600-7610: `ition cogn`\n","Group 3 found at 7600-7610: `ition cogn`\n","<re.Match object; span=(13637, 13659), match='supervised supervised '>\n","Group 1 found at 13637-13648: `supervised `\n","Group 2 found at 13648-13659: `supervised `\n","Group 3 found at 13648-13659: `supervised `\n","(6, 42, 48)\n"]},{"data":{"text/plain":["(6, 42, 48)"]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["output = row[\"Qwen/Qwen2-7B-Instruct/rpp-1.30\"]\n","print(row[\"Qwen/Qwen2-7B-Instruct/rpp-1.30\"])\n","detect_repetitions(output, debug=True)"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
results/mac-results_greedy_decoding.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863cc947747d50db0011637b38b3cb6ca73c491bf2eb51fdd8391d23191afbe7
3
+ size 3814560
scripts/eval-4gpu.sh CHANGED
@@ -17,6 +17,6 @@ grep MemTotal /proc/meminfo
17
 
18
  ./scripts/eval-model.sh Qwen/Qwen2-72B-Instruct
19
 
20
- #./scripts/eval-model.sh shenzhi-wang/Llama3.1-70B-Chinese-Chat
21
 
22
- #./scripts/eval-model.sh 01-ai/Yi-1.5-34B-Chat
 
17
 
18
  ./scripts/eval-model.sh Qwen/Qwen2-72B-Instruct
19
 
20
+ ./scripts/eval-model.sh shenzhi-wang/Llama3.1-70B-Chinese-Chat
21
 
22
+ ./scripts/eval-model.sh 01-ai/Yi-1.5-34B-Chat
scripts/eval-mac.sh CHANGED
@@ -11,18 +11,13 @@ cat /etc/os-release
11
  lscpu
12
  grep MemTotal /proc/meminfo
13
 
14
- pip install torch torchvision torchaudio
 
15
 
16
- pip install -r requirements.txt
17
 
18
  ./scripts/eval-model.sh Qwen/Qwen2-7B-Instruct
19
 
20
- ./scripts/eval-model.sh internlm/internlm2_5-7b-chat-1m
21
-
22
- ./scripts/eval-model.sh THUDM/glm-4-9b-chat-1m
23
-
24
  ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
25
 
26
- ./scripts/eval-model.sh shenzhi-wang/Gemma-2-9B-Chinese-Chat
27
-
28
- ./scripts/eval-model.sh shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat
 
11
  lscpu
12
  grep MemTotal /proc/meminfo
13
 
14
+ # pip install torch torchvision torchaudio
15
+ # pip install -r requirements.txt
16
 
17
+ ./scripts/eval-model.sh shenzhi-wang/Mistral-7B-v0.3-Chinese-Chat
18
 
19
  ./scripts/eval-model.sh Qwen/Qwen2-7B-Instruct
20
 
 
 
 
 
21
  ./scripts/eval-model.sh shenzhi-wang/Llama3.1-8B-Chinese-Chat
22
 
23
+ ./scripts/eval-model.sh 01-ai/Yi-1.5-9B-Chat