Spaces:
Build error
Build error
fine-tuned metrics WIP
Browse files
.gitattributes
CHANGED
@@ -67,3 +67,4 @@ results/mac-results_few_shots_openai.csv filter=lfs diff=lfs merge=lfs -text
|
|
67 |
results/mac-results_fine_tuned.csv filter=lfs diff=lfs merge=lfs -text
|
68 |
results/mac-results_greedy_decoding_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
69 |
results/mac-results_few_shots_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
67 |
results/mac-results_fine_tuned.csv filter=lfs diff=lfs merge=lfs -text
|
68 |
results/mac-results_greedy_decoding_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
69 |
results/mac-results_few_shots_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
70 |
+
results/mac-results_fine_tuned_metrics.csv filter=lfs diff=lfs merge=lfs -text
|
llm_toolkit/translation_utils.py
CHANGED
@@ -290,6 +290,9 @@ def get_metrics(df, max_output_tokens=2048, variant="rpp"):
|
|
290 |
|
291 |
metrics_df["num_max_output_tokens"] = num_max_output_tokens
|
292 |
|
|
|
|
|
|
|
293 |
return metrics_df
|
294 |
|
295 |
|
@@ -510,11 +513,10 @@ def convert_time_to_seconds(time_str):
|
|
510 |
return total_seconds
|
511 |
|
512 |
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
|
517 |
-
def process_log_file(log_file, total_entries):
|
518 |
model = []
|
519 |
shots = []
|
520 |
eval_time = []
|
@@ -546,27 +548,28 @@ def process_log_file(log_file, total_entries):
|
|
546 |
df = pd.DataFrame(
|
547 |
{
|
548 |
"model": model,
|
549 |
-
|
550 |
"eval_time": eval_time,
|
551 |
}
|
552 |
)
|
553 |
return df
|
554 |
|
555 |
|
556 |
-
def load_eval_times(logs_folder, total_entries=1133):
|
557 |
# Get a list of all files in the logs folder
|
558 |
log_files = glob.glob(os.path.join(logs_folder, "*"))
|
559 |
log_files.sort()
|
560 |
|
561 |
-
time_df = pd.DataFrame({"model": [],
|
562 |
|
563 |
for log_file in log_files:
|
564 |
print(f"Loading content of {log_file}")
|
565 |
-
df = process_log_file(log_file, total_entries
|
566 |
time_df = pd.concat([time_df, df], ignore_index=True)
|
567 |
|
568 |
-
time_df[
|
569 |
-
|
|
|
570 |
|
571 |
|
572 |
def load_alpaca_data(data_path):
|
|
|
290 |
|
291 |
metrics_df["num_max_output_tokens"] = num_max_output_tokens
|
292 |
|
293 |
+
if variant != "rpp":
|
294 |
+
metrics_df[variant] = metrics_df[variant].astype(int)
|
295 |
+
|
296 |
return metrics_df
|
297 |
|
298 |
|
|
|
513 |
return total_seconds
|
514 |
|
515 |
|
516 |
+
def process_log_file(log_file, total_entries, variant):
|
517 |
+
time_pattern = re.compile(r"\[(.{5,10})<00:00")
|
518 |
+
metrics_pattern = re.compile(rf"(.*)/{variant}-(.*) metrics:")
|
519 |
|
|
|
520 |
model = []
|
521 |
shots = []
|
522 |
eval_time = []
|
|
|
548 |
df = pd.DataFrame(
|
549 |
{
|
550 |
"model": model,
|
551 |
+
variant: shots,
|
552 |
"eval_time": eval_time,
|
553 |
}
|
554 |
)
|
555 |
return df
|
556 |
|
557 |
|
558 |
+
def load_eval_times(logs_folder, total_entries=1133, variant="shots"):
|
559 |
# Get a list of all files in the logs folder
|
560 |
log_files = glob.glob(os.path.join(logs_folder, "*"))
|
561 |
log_files.sort()
|
562 |
|
563 |
+
time_df = pd.DataFrame({"model": [], variant: [], "eval_time": []})
|
564 |
|
565 |
for log_file in log_files:
|
566 |
print(f"Loading content of {log_file}")
|
567 |
+
df = process_log_file(log_file, total_entries, variant)
|
568 |
time_df = pd.concat([time_df, df], ignore_index=True)
|
569 |
|
570 |
+
time_df[variant] = time_df[variant].apply(lambda x: int(x))
|
571 |
+
# Keep the last occurrence of each duplicate
|
572 |
+
return time_df.drop_duplicates(subset=["model", variant], keep="last")
|
573 |
|
574 |
|
575 |
def load_alpaca_data(data_path):
|
notebooks/00b_Data Analysis_Few_Shots.ipynb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79fc6f45be27f13fe14be2e41598b8cc605cfcb71565a136603608711a5338fa
|
3 |
+
size 1606476
|
notebooks/00c_Data Analysis_Fine_Tuned.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
results/mac-results_few_shots_metrics.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40bd68a7831cf37a0bd5b4e290435873c574a15bca5a79400127a46ff2717672
|
3 |
+
size 8156
|
results/mac-results_fine_tuned_metrics.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:506cb6b5562efe12e50f6cd3c20ee50c979d5c6ef344ce933ca52f8ef26159fa
|
3 |
+
size 3217
|