machine-translation

Build error

App Files Files Community

dh-mc commited on Aug 9, 2024

Commit

ee71b10

1 Parent(s): 38f452c

add env var END_REPETITION_PENALTY

Browse files

Files changed (7) hide show

.env.example +2 -1
llm_toolkit/eval_rpp.py +2 -1
llm_toolkit/translation_utils.py +11 -7
notebooks/00c_Data Analysis_Fine_Tuned.ipynb +0 -0
notebooks/00d_Data Analysis_Fine_Tuned_RPP.ipynb +0 -0
results/mac-results_fine_tuned_metrics.csv +2 -2
scripts/eval-mac.sh +1 -1

.env.example CHANGED Viewed

@@ -1,4 +1,5 @@
-MODEL_NAME=internlm/internlm2_5-7b-chat-1m
 BATCH_SIZE=2
 MAX_NEW_TOKENS=300

+MODEL_NAME=Qwen/Qwen2-7B-Instruct
 BATCH_SIZE=2
 MAX_NEW_TOKENS=300

llm_toolkit/eval_rpp.py CHANGED Viewed

@@ -29,6 +29,7 @@ batch_size = int(os.getenv("BATCH_SIZE", 1))
 use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
 max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
 start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
 print(
     model_name,
@@ -95,7 +96,7 @@ evaluate_model_with_repetition_penalty(
     datasets["test"],
     on_repetition_penalty_step_completed,
     start_repetition_penalty=start_repetition_penalty,
-    end_repetition_penalty=1.3,
     step_repetition_penalty=0.02,
     batch_size=batch_size,
     max_new_tokens=max_new_tokens,

 use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
 max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
 start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
+end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
 print(
     model_name,
     datasets["test"],
     on_repetition_penalty_step_completed,
     start_repetition_penalty=start_repetition_penalty,
+    end_repetition_penalty=end_repetition_penalty,
     step_repetition_penalty=0.02,
     batch_size=batch_size,
     max_new_tokens=max_new_tokens,

llm_toolkit/translation_utils.py CHANGED Viewed

@@ -223,14 +223,16 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
         lambda x: x.split(f"{variant}-")[-1]
     )
     metrics_df["model"] = metrics_df["model"].apply(
-        lambda x: x.split(f"/{variant}-")[0]
     )
     metrics_df.reset_index(inplace=True)
     metrics_df = metrics_df.drop(columns=["index"])
-    tokenizers = {
-        model: load_tokenizer(model) for model in metrics_df["model"].unique()
-    }
     meteor = []
     bleu_1 = []
@@ -264,7 +266,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
         repetition_score.append(df["repetition_score"].mean())
         total_repetitions.append(df["total_repetitions"].mean())
-        model = col.split(f"/{variant}")[0]
         new_col = f"ground_truth_tokens-{model}"
         df[new_col] = df["english"].apply(
@@ -533,7 +535,7 @@ def process_log_file(log_file, total_entries, variant):
                         metrics_pattern_matches = matches
                         groups = metrics_pattern_matches.groups()
-                        model.append(groups[0])
                         shots.append(groups[1])
                         groups = time_pattern_matches.groups()
@@ -567,7 +569,9 @@ def load_eval_times(logs_folder, total_entries=1133, variant="shots"):
         df = process_log_file(log_file, total_entries, variant)
         time_df = pd.concat([time_df, df], ignore_index=True)
-    time_df[variant] = time_df[variant].apply(lambda x: int(x))
     # Keep the last occurrence of each duplicate
     return time_df.drop_duplicates(subset=["model", variant], keep="last")

         lambda x: x.split(f"{variant}-")[-1]
     )
     metrics_df["model"] = metrics_df["model"].apply(
+        lambda x: x.split(f"/{variant}-")[0].split("/checkpoint")[0]
     )
     metrics_df.reset_index(inplace=True)
     metrics_df = metrics_df.drop(columns=["index"])
+    models = metrics_df["model"].unique()
+    print(models)
+    tokenizers = {model: load_tokenizer(model) for model in models}
     meteor = []
     bleu_1 = []
         repetition_score.append(df["repetition_score"].mean())
         total_repetitions.append(df["total_repetitions"].mean())
+        model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
         new_col = f"ground_truth_tokens-{model}"
         df[new_col] = df["english"].apply(
                         metrics_pattern_matches = matches
                         groups = metrics_pattern_matches.groups()
+                        model.append(groups[0].split("/checkpoint")[0])
                         shots.append(groups[1])
                         groups = time_pattern_matches.groups()
         df = process_log_file(log_file, total_entries, variant)
         time_df = pd.concat([time_df, df], ignore_index=True)
+    time_df[variant] = time_df[variant].apply(
+        lambda x: x if variant == "rpp" else int(x)
+    )
     # Keep the last occurrence of each duplicate
     return time_df.drop_duplicates(subset=["model", variant], keep="last")

notebooks/00c_Data Analysis_Fine_Tuned.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/00d_Data Analysis_Fine_Tuned_RPP.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

results/mac-results_fine_tuned_metrics.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:506cb6b5562efe12e50f6cd3c20ee50c979d5c6ef344ce933ca52f8ef26159fa
-size 3217

 version https://git-lfs.github.com/spec/v1
+oid sha256:9443cd0d5ed360cccbbf9f58b0f26e41320c962bca19fe34ae4dbeb9334de610
+size 1158

scripts/eval-mac.sh CHANGED Viewed

@@ -38,4 +38,4 @@ grep MemTotal /proc/meminfo
 ./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
-# ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105


38
39	./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
40
41	+ ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105