Spaces:
Build error
Build error
add env var END_REPETITION_PENALTY
Browse files
.env.example
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
MODEL_NAME=
|
|
|
2 |
BATCH_SIZE=2
|
3 |
MAX_NEW_TOKENS=300
|
4 |
|
|
|
1 |
+
MODEL_NAME=Qwen/Qwen2-7B-Instruct
|
2 |
+
|
3 |
BATCH_SIZE=2
|
4 |
MAX_NEW_TOKENS=300
|
5 |
|
llm_toolkit/eval_rpp.py
CHANGED
@@ -29,6 +29,7 @@ batch_size = int(os.getenv("BATCH_SIZE", 1))
|
|
29 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
30 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
31 |
start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
|
|
|
32 |
|
33 |
print(
|
34 |
model_name,
|
@@ -95,7 +96,7 @@ evaluate_model_with_repetition_penalty(
|
|
95 |
datasets["test"],
|
96 |
on_repetition_penalty_step_completed,
|
97 |
start_repetition_penalty=start_repetition_penalty,
|
98 |
-
end_repetition_penalty=
|
99 |
step_repetition_penalty=0.02,
|
100 |
batch_size=batch_size,
|
101 |
max_new_tokens=max_new_tokens,
|
|
|
29 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
30 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 2048))
|
31 |
start_repetition_penalty = float(os.getenv("START_REPETITION_PENALTY", 1.0))
|
32 |
+
end_repetition_penalty = float(os.getenv("END_REPETITION_PENALTY", 1.3))
|
33 |
|
34 |
print(
|
35 |
model_name,
|
|
|
96 |
datasets["test"],
|
97 |
on_repetition_penalty_step_completed,
|
98 |
start_repetition_penalty=start_repetition_penalty,
|
99 |
+
end_repetition_penalty=end_repetition_penalty,
|
100 |
step_repetition_penalty=0.02,
|
101 |
batch_size=batch_size,
|
102 |
max_new_tokens=max_new_tokens,
|
llm_toolkit/translation_utils.py
CHANGED
@@ -223,14 +223,16 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
223 |
lambda x: x.split(f"{variant}-")[-1]
|
224 |
)
|
225 |
metrics_df["model"] = metrics_df["model"].apply(
|
226 |
-
lambda x: x.split(f"/{variant}-")[0]
|
227 |
)
|
|
|
228 |
metrics_df.reset_index(inplace=True)
|
229 |
metrics_df = metrics_df.drop(columns=["index"])
|
230 |
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
234 |
|
235 |
meteor = []
|
236 |
bleu_1 = []
|
@@ -264,7 +266,7 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
264 |
repetition_score.append(df["repetition_score"].mean())
|
265 |
total_repetitions.append(df["total_repetitions"].mean())
|
266 |
|
267 |
-
model = col.split(f"/{variant}")[0]
|
268 |
|
269 |
new_col = f"ground_truth_tokens-{model}"
|
270 |
df[new_col] = df["english"].apply(
|
@@ -533,7 +535,7 @@ def process_log_file(log_file, total_entries, variant):
|
|
533 |
metrics_pattern_matches = matches
|
534 |
groups = metrics_pattern_matches.groups()
|
535 |
|
536 |
-
model.append(groups[0])
|
537 |
shots.append(groups[1])
|
538 |
|
539 |
groups = time_pattern_matches.groups()
|
@@ -567,7 +569,9 @@ def load_eval_times(logs_folder, total_entries=1133, variant="shots"):
|
|
567 |
df = process_log_file(log_file, total_entries, variant)
|
568 |
time_df = pd.concat([time_df, df], ignore_index=True)
|
569 |
|
570 |
-
time_df[variant] = time_df[variant].apply(
|
|
|
|
|
571 |
# Keep the last occurrence of each duplicate
|
572 |
return time_df.drop_duplicates(subset=["model", variant], keep="last")
|
573 |
|
|
|
223 |
lambda x: x.split(f"{variant}-")[-1]
|
224 |
)
|
225 |
metrics_df["model"] = metrics_df["model"].apply(
|
226 |
+
lambda x: x.split(f"/{variant}-")[0].split("/checkpoint")[0]
|
227 |
)
|
228 |
+
|
229 |
metrics_df.reset_index(inplace=True)
|
230 |
metrics_df = metrics_df.drop(columns=["index"])
|
231 |
|
232 |
+
models = metrics_df["model"].unique()
|
233 |
+
print(models)
|
234 |
+
|
235 |
+
tokenizers = {model: load_tokenizer(model) for model in models}
|
236 |
|
237 |
meteor = []
|
238 |
bleu_1 = []
|
|
|
266 |
repetition_score.append(df["repetition_score"].mean())
|
267 |
total_repetitions.append(df["total_repetitions"].mean())
|
268 |
|
269 |
+
model = col.split(f"/{variant}")[0].split("/checkpoint")[0]
|
270 |
|
271 |
new_col = f"ground_truth_tokens-{model}"
|
272 |
df[new_col] = df["english"].apply(
|
|
|
535 |
metrics_pattern_matches = matches
|
536 |
groups = metrics_pattern_matches.groups()
|
537 |
|
538 |
+
model.append(groups[0].split("/checkpoint")[0])
|
539 |
shots.append(groups[1])
|
540 |
|
541 |
groups = time_pattern_matches.groups()
|
|
|
569 |
df = process_log_file(log_file, total_entries, variant)
|
570 |
time_df = pd.concat([time_df, df], ignore_index=True)
|
571 |
|
572 |
+
time_df[variant] = time_df[variant].apply(
|
573 |
+
lambda x: x if variant == "rpp" else int(x)
|
574 |
+
)
|
575 |
# Keep the last occurrence of each duplicate
|
576 |
return time_df.drop_duplicates(subset=["model", variant], keep="last")
|
577 |
|
notebooks/00c_Data Analysis_Fine_Tuned.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/00d_Data Analysis_Fine_Tuned_RPP.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
results/mac-results_fine_tuned_metrics.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9443cd0d5ed360cccbbf9f58b0f26e41320c962bca19fe34ae4dbeb9334de610
|
3 |
+
size 1158
|
scripts/eval-mac.sh
CHANGED
@@ -38,4 +38,4 @@ grep MemTotal /proc/meminfo
|
|
38 |
|
39 |
./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
|
40 |
|
41 |
-
|
|
|
38 |
|
39 |
./scripts/eval-rpp.sh shenzhi-wang Mistral-7B-v0.3-Chinese-Chat checkpoint-70
|
40 |
|
41 |
+
./scripts/eval-rpp.sh shenzhi-wang Llama3.1-8B-Chinese-Chat checkpoint-105
|