dh-mc commited on
Commit
ee71b10
·
1 Parent(s): 38f452c

add env var END_REPETITION_PENALTY

Browse files
.env.example CHANGED
@@ -1,4 +1,5 @@
1
- MODEL_NAME=internlm/internlm2_5-7b-chat-1m
 
2
  BATCH_SIZE=2
3
  MAX_NEW_TOKENS=300
4
 
 
1
+ MODEL_NAME=Qwen/Qwen2-7B-Instruct
2
+
3
  BATCH_SIZE=2
4
  MAX_NEW_TOKENS=300
5
 
llm_toolkit/eval_rpp.py CHANGED
@@ -29,6 +29,7 @@ batch_size = int(os.getenv("BATCH_SIZE", 1))
29
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
30
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
31
  start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
 
32
 
33
  print(
34
  model_name,
@@ -95,7 +96,7 @@ evaluate_model_with_repetition_penalty(
95
  datasets["test"],
96
  on_repetition_penalty_step_completed,
97
  start_repetition_penalty=start_repetition_penalty,
98
- end_repetition_penalty=1.3,
99
  step_repetition_penalty=0.02,
100
  batch_size=batch_size,
101
  max_new_tokens=max_new_tokens,
 
29
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
30
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
31
  start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
32
+ end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
33
 
34
  print(
35
  model_name,
 
96
  datasets["test"],
97
  on_repetition_penalty_step_completed,
98
  start_repetition_penalty=start_repetition_penalty,
99
+ end_repetition_penalty=end_repetition_penalty,
100
  step_repetition_penalty=0.02,
101
  batch_size=batch_size,
102
  max_new_tokens=max_new_tokens,
llm_toolkit/translation_utils.py CHANGED
@@ -223,14 +223,16 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
223
  lambda x: x.split(f"{variant}-")[-1]
224
  )
225
  metrics_df["model"] = metrics_df["model"].apply(
226
- lambda x: x.split(f"/{variant}-")[0]
227
  )
 
228
  metrics_df.reset_index(inplace=True)
229
  metrics_df = metrics_df.drop(columns=["index"])
230
 
231
- tokenizers = {
232
- model: load_tokenizer(model) for model in metrics_df["model"].unique()
233
- }
 
234
 
235
  meteor = []
236
  bleu_1 = []
@@ -264,7 +266,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
264
  repetition_score.append(df["repetition_score"].mean())
265
  total_repetitions.append(df["total_repetitions"].mean())
266
 
267
- model = col.split(f"/{variant}")[0]
268
 
269
  new_col = f"ground_truth_tokens-{model}"
270
  df[new_col] = df["english"].apply(
@@ -533,7 +535,7 @@ def process_log_file(log_file, total_entries, variant):
533
  metrics_pattern_matches = matches
534
  groups = metrics_pattern_matches.groups()
535
 
536
- model.append(groups[0])
537
  shots.append(groups[1])
538
 
539
  groups = time_pattern_matches.groups()
@@ -567,7 +569,9 @@ def load_eval_times(logs_folder, total_entries=1133, variant="shots"):
567
  df = process_log_file(log_file, total_entries, variant)
568
  time_df = pd.concat([time_df, df], ignore_index=True)
569
 
570
- time_df[variant] = time_df[variant].apply(lambda x: int(x))
 
 
571
  # Keep the last occurrence of each duplicate
572
  return time_df.drop_duplicates(subset=["model", variant], keep="last")
573
 
 
223
  lambda x: x.split(f"{variant}-")[-1]
224
  )
225
  metrics_df["model"] = metrics_df["model"].apply(
226
+ lambda x: x.split(f"/{variant}-")[0].split("/checkpoint")[0]
227
  )
228
+
229
  metrics_df.reset_index(inplace=True)
230
  metrics_df = metrics_df.drop(columns=["index"])
231
 
232
+ models = metrics_df["model"].unique()
233
+ print(models)
234
+
235
+ tokenizers = {model: load_tokenizer(model) for model in models}
236
 
237
  meteor = []
238
  bleu_1 = []
 
266
  repetition_score.append(df["repetition_score"].mean())
267
  total_repetitions.append(df["total_repetitions"].mean())
268
 
269
+ model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
270
 
271
  new_col = f"ground_truth_tokens-{model}"
272
  df[new_col] = df["english"].apply(
 
535
  metrics_pattern_matches = matches
536
  groups = metrics_pattern_matches.groups()
537
 
538
+ model.append(groups[0].split("/checkpoint")[0])
539
  shots.append(groups[1])
540
 
541
  groups = time_pattern_matches.groups()
 
569
  df = process_log_file(log_file, total_entries, variant)
570
  time_df = pd.concat([time_df, df], ignore_index=True)
571
 
572
+ time_df[variant] = time_df[variant].apply(
573
+ lambda x: x if variant == "rpp" else int(x)
574
+ )
575
  # Keep the last occurrence of each duplicate
576
  return time_df.drop_duplicates(subset=["model", variant], keep="last")
577
 
notebooks/00c_Data Analysis_Fine_Tuned.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/00d_Data Analysis_Fine_Tuned_RPP.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
results/mac-results_fine_tuned_metrics.csv CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:506cb6b5562efe12e50f6cd3c20ee50c979d5c6ef344ce933ca52f8ef26159fa
3
- size 3217
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9443cd0d5ed360cccbbf9f58b0f26e41320c962bca19fe34ae4dbeb9334de610
3
+ size 1158
scripts/eval-mac.sh CHANGED
@@ -38,4 +38,4 @@ grep MemTotal /proc/meminfo
38
 
39
  ./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
40
 
41
- # ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
 
38
 
39
  ./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
40
 
41
+ ./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105