Spaces:
Sleeping
Sleeping
perf analysis of phi-3 results
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +66 -0
- app_modules/utils.py +146 -1
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.020.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.040.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.080.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.100.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.140.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.160.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.180.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.200.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.220.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.240.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.260.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.280.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.300.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.000.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.020.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.040.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.060.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.080.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.100.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.120.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.140.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.160.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.180.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.200.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.220.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.240.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.260.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.280.txt +3 -0
- data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.300.txt +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.000.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.020.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.040.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.060.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.080.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.100.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.120.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.140.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.160.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.180.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.200.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.220.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.240.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.260.csv +3 -0
- data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.280.csv +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,69 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.040.txt filter=lfs diff=lfs merge=lfs -text
|
37 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.240.txt filter=lfs diff=lfs merge=lfs -text
|
38 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.180.txt filter=lfs diff=lfs merge=lfs -text
|
39 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.280.txt filter=lfs diff=lfs merge=lfs -text
|
40 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.120.txt filter=lfs diff=lfs merge=lfs -text
|
41 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.180.txt filter=lfs diff=lfs merge=lfs -text
|
42 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.160.txt filter=lfs diff=lfs merge=lfs -text
|
43 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.260.txt filter=lfs diff=lfs merge=lfs -text
|
44 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.100.txt filter=lfs diff=lfs merge=lfs -text
|
45 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.140.txt filter=lfs diff=lfs merge=lfs -text
|
46 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.000.txt filter=lfs diff=lfs merge=lfs -text
|
47 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.100.txt filter=lfs diff=lfs merge=lfs -text
|
48 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.080.txt filter=lfs diff=lfs merge=lfs -text
|
49 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.220.txt filter=lfs diff=lfs merge=lfs -text
|
50 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.080.txt filter=lfs diff=lfs merge=lfs -text
|
51 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.200.txt filter=lfs diff=lfs merge=lfs -text
|
52 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.260.txt filter=lfs diff=lfs merge=lfs -text
|
53 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.300.txt filter=lfs diff=lfs merge=lfs -text
|
54 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.280.txt filter=lfs diff=lfs merge=lfs -text
|
55 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.300.txt filter=lfs diff=lfs merge=lfs -text
|
56 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt filter=lfs diff=lfs merge=lfs -text
|
57 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.240.txt filter=lfs diff=lfs merge=lfs -text
|
58 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.220.txt filter=lfs diff=lfs merge=lfs -text
|
59 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.060.txt filter=lfs diff=lfs merge=lfs -text
|
60 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt filter=lfs diff=lfs merge=lfs -text
|
61 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.020.txt filter=lfs diff=lfs merge=lfs -text
|
62 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.040.txt filter=lfs diff=lfs merge=lfs -text
|
63 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.160.txt filter=lfs diff=lfs merge=lfs -text
|
64 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt filter=lfs diff=lfs merge=lfs -text
|
65 |
+
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.200.txt filter=lfs diff=lfs merge=lfs -text
|
66 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.020.txt filter=lfs diff=lfs merge=lfs -text
|
67 |
+
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.140.txt filter=lfs diff=lfs merge=lfs -text
|
68 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.280.csv filter=lfs diff=lfs merge=lfs -text
|
69 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.080.csv filter=lfs diff=lfs merge=lfs -text
|
70 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.040.csv filter=lfs diff=lfs merge=lfs -text
|
71 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.060.csv filter=lfs diff=lfs merge=lfs -text
|
72 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.280.csv filter=lfs diff=lfs merge=lfs -text
|
73 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.020.csv filter=lfs diff=lfs merge=lfs -text
|
74 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.200.csv filter=lfs diff=lfs merge=lfs -text
|
75 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.080.csv filter=lfs diff=lfs merge=lfs -text
|
76 |
+
data/results/Phi-3-mini-128k-instruct_mm_false.csv filter=lfs diff=lfs merge=lfs -text
|
77 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.060.csv filter=lfs diff=lfs merge=lfs -text
|
78 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.260.csv filter=lfs diff=lfs merge=lfs -text
|
79 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.160.csv filter=lfs diff=lfs merge=lfs -text
|
80 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.300.csv filter=lfs diff=lfs merge=lfs -text
|
81 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.160.csv filter=lfs diff=lfs merge=lfs -text
|
82 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.220.csv filter=lfs diff=lfs merge=lfs -text
|
83 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.300.csv filter=lfs diff=lfs merge=lfs -text
|
84 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.020.csv filter=lfs diff=lfs merge=lfs -text
|
85 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.040.csv filter=lfs diff=lfs merge=lfs -text
|
86 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.140.csv filter=lfs diff=lfs merge=lfs -text
|
87 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.120.csv filter=lfs diff=lfs merge=lfs -text
|
88 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.180.csv filter=lfs diff=lfs merge=lfs -text
|
89 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.140.csv filter=lfs diff=lfs merge=lfs -text
|
90 |
+
data/results/Phi-3-mini-128k-instruct_mm_true.csv filter=lfs diff=lfs merge=lfs -text
|
91 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.120.csv filter=lfs diff=lfs merge=lfs -text
|
92 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.260.csv filter=lfs diff=lfs merge=lfs -text
|
93 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.100.csv filter=lfs diff=lfs merge=lfs -text
|
94 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.220.csv filter=lfs diff=lfs merge=lfs -text
|
95 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.100.csv filter=lfs diff=lfs merge=lfs -text
|
96 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.000.csv filter=lfs diff=lfs merge=lfs -text
|
97 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.180.csv filter=lfs diff=lfs merge=lfs -text
|
98 |
+
data/results/Phi-3-mini-128k-instruct_mm_true_RP_1.240.csv filter=lfs diff=lfs merge=lfs -text
|
99 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.000.csv filter=lfs diff=lfs merge=lfs -text
|
100 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.200.csv filter=lfs diff=lfs merge=lfs -text
|
101 |
+
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.240.csv filter=lfs diff=lfs merge=lfs -text
|
app_modules/utils.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
# -*- coding:utf-8 -*-
|
2 |
from __future__ import annotations
|
3 |
|
4 |
-
import json
|
5 |
import logging
|
6 |
import os
|
|
|
7 |
import platform
|
8 |
import re
|
9 |
from pathlib import Path
|
@@ -13,6 +13,8 @@ import requests
|
|
13 |
import torch
|
14 |
from tqdm import tqdm
|
15 |
from langchain.memory import ConversationSummaryBufferMemory
|
|
|
|
|
16 |
|
17 |
|
18 |
class LogRecord(logging.LogRecord):
|
@@ -276,3 +278,146 @@ def detect_repetition_scores(text, debug=False):
|
|
276 |
text, debug=debug
|
277 |
)
|
278 |
return pd.Series([newline_score, repetition_score, total_repetitions])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# -*- coding:utf-8 -*-
|
2 |
from __future__ import annotations
|
3 |
|
|
|
4 |
import logging
|
5 |
import os
|
6 |
+
import math
|
7 |
import platform
|
8 |
import re
|
9 |
from pathlib import Path
|
|
|
13 |
import torch
|
14 |
from tqdm import tqdm
|
15 |
from langchain.memory import ConversationSummaryBufferMemory
|
16 |
+
import matplotlib.pyplot as plt
|
17 |
+
import matplotlib.ticker as mtick
|
18 |
|
19 |
|
20 |
class LogRecord(logging.LogRecord):
|
|
|
278 |
text, debug=debug
|
279 |
)
|
280 |
return pd.Series([newline_score, repetition_score, total_repetitions])
|
281 |
+
|
282 |
+
|
283 |
+
def load_with_newline_and_repetition_scores(result_file, force_recalculate=False):
|
284 |
+
print(f"loading result file: {result_file}")
|
285 |
+
df = pd.read_csv(result_file, comment="#", on_bad_lines="warn")
|
286 |
+
|
287 |
+
if (
|
288 |
+
force_recalculate
|
289 |
+
or "newline_score" not in df.columns
|
290 |
+
or "repetition_score" not in df.columns
|
291 |
+
or "total_repetitions" not in df.columns
|
292 |
+
):
|
293 |
+
df[["newline_score", "repetition_score", "total_repetitions"]] = df[
|
294 |
+
"answer"
|
295 |
+
].apply(detect_scores)
|
296 |
+
df.to_csv(result_file, index=False)
|
297 |
+
|
298 |
+
return df
|
299 |
+
|
300 |
+
|
301 |
+
def replace_last(source_string, old_string, new_string):
|
302 |
+
head, _sep, tail = source_string.rpartition(old_string)
|
303 |
+
return head + new_string + tail
|
304 |
+
|
305 |
+
|
306 |
+
df_ms_macro = pd.read_json("./data/datasets/ms_macro.json")
|
307 |
+
|
308 |
+
|
309 |
+
def load_for_repetition_penalty_ms_macro(
|
310 |
+
csv_result_file, repetition_penalty, force_recalculate=False
|
311 |
+
):
|
312 |
+
result_file = replace_last(
|
313 |
+
csv_result_file, ".csv", f"_RP_{repetition_penalty:.3f}.csv"
|
314 |
+
)
|
315 |
+
df = load_with_newline_and_repetition_scores(
|
316 |
+
result_file, force_recalculate=force_recalculate
|
317 |
+
)
|
318 |
+
|
319 |
+
if df["ground_truth"][0] != df_ms_macro["wellFormedAnswers"][0]:
|
320 |
+
df["ground_truth"] = df_ms_macro["wellFormedAnswers"]
|
321 |
+
print("ground_truth updated for:", result_file)
|
322 |
+
df.to_csv(result_file, index=False)
|
323 |
+
return df
|
324 |
+
|
325 |
+
|
326 |
+
def adjust_perf_scores_with_repetition_penalty(result, precision, recall):
|
327 |
+
newline_score = [
|
328 |
+
df["newline_score"].mean() for df in result["df_list_repetition_penalty"]
|
329 |
+
]
|
330 |
+
print(f"newline_score: {newline_score}")
|
331 |
+
|
332 |
+
repetition_score = [
|
333 |
+
df["repetition_score"].mean() for df in result["df_list_repetition_penalty"]
|
334 |
+
]
|
335 |
+
print(f"repetition_score: {repetition_score}")
|
336 |
+
|
337 |
+
precision = [
|
338 |
+
f / math.log10(10 + n + r)
|
339 |
+
for f, n, r in zip(precision, newline_score, repetition_score)
|
340 |
+
]
|
341 |
+
recall = [
|
342 |
+
f / math.log10(10 + n + r)
|
343 |
+
for f, n, r in zip(recall, newline_score, repetition_score)
|
344 |
+
]
|
345 |
+
|
346 |
+
return precision, recall
|
347 |
+
|
348 |
+
|
349 |
+
# MS MACRO
|
350 |
+
def plot_performance_scores_ms_macro(
|
351 |
+
result,
|
352 |
+
models=None,
|
353 |
+
title="Performance",
|
354 |
+
):
|
355 |
+
|
356 |
+
if models is None:
|
357 |
+
models = result.keys()
|
358 |
+
for model in models:
|
359 |
+
print(f"model: {model}")
|
360 |
+
df = result[model]["df_overall"]
|
361 |
+
# print(result[model]["df_list_repetition_penalty"][0].describe())
|
362 |
+
|
363 |
+
# Calculate the statistics
|
364 |
+
bleu1 = list(df["bleu1"])
|
365 |
+
rougeL = list(df["rougeL"])
|
366 |
+
f1 = [2 * (p * r) / (p + r) for p, r in zip(bleu1, rougeL)]
|
367 |
+
best_f1 = max(f1)
|
368 |
+
best_f1_index = f1.index(best_f1)
|
369 |
+
|
370 |
+
bleu1, rougeL = adjust_perf_scores_with_repetition_penalty(
|
371 |
+
result[model], bleu1, rougeL
|
372 |
+
)
|
373 |
+
afrp = [2 * (p * r) / (p + r) for p, r in zip(bleu1, rougeL)]
|
374 |
+
|
375 |
+
# f1 = [df["f1"].mean() for df in result[model]["df_list_repetition_penalty"]]
|
376 |
+
best_afrp = max(afrp)
|
377 |
+
best_afrp_index = afrp.index(best_afrp)
|
378 |
+
|
379 |
+
repetition_penalties = list(df["repetition_penalty"])
|
380 |
+
|
381 |
+
# line plot for precision, recall, f1
|
382 |
+
plt.figure(figsize=(10, 6))
|
383 |
+
|
384 |
+
plt.axvspan(
|
385 |
+
repetition_penalties[best_f1_index] - 0.01,
|
386 |
+
repetition_penalties[best_f1_index] + 0.01,
|
387 |
+
alpha=0.5,
|
388 |
+
edgecolor="none",
|
389 |
+
facecolor="blue",
|
390 |
+
)
|
391 |
+
|
392 |
+
plt.axvspan(
|
393 |
+
repetition_penalties[best_afrp_index] - 0.01,
|
394 |
+
repetition_penalties[best_afrp_index] + 0.01,
|
395 |
+
alpha=0.5,
|
396 |
+
edgecolor="none",
|
397 |
+
facecolor="orange",
|
398 |
+
)
|
399 |
+
|
400 |
+
plt.plot(
|
401 |
+
repetition_penalties,
|
402 |
+
f1,
|
403 |
+
label="Overall Perf Score",
|
404 |
+
marker="D",
|
405 |
+
color="blue",
|
406 |
+
)
|
407 |
+
plt.plot(
|
408 |
+
repetition_penalties,
|
409 |
+
afrp,
|
410 |
+
label="RF Adjusted Perf Score",
|
411 |
+
marker="o",
|
412 |
+
color="orange",
|
413 |
+
)
|
414 |
+
|
415 |
+
plt.xlabel("Repetition Penalties")
|
416 |
+
plt.ylabel("Score")
|
417 |
+
plt.xlim(0.99, 1.31)
|
418 |
+
# y in percentage
|
419 |
+
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
|
420 |
+
plt.title(f"{model} {title}")
|
421 |
+
plt.legend(bbox_to_anchor=(1.0, 0.5), loc="center left")
|
422 |
+
|
423 |
+
plt.show()
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.000.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34860965667ba1a520aa539d7893315c0769b8fcccc7eb9a2b83d3165629d434
|
3 |
+
size 1412667
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.020.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df32ca49fd45a9569ce66d52e169cb67618bdcff622f2744bf8cde81c64157e7
|
3 |
+
size 1303428
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.040.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19a78d3d0dfe854b55d80af3ccc3eb90671f37ed15c8819e339a24939f4c900f
|
3 |
+
size 1014113
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.060.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f834d6a3c9429ba3f883a923e12c94c8b9a62acfe0cfeb3a1218403e31c7958
|
3 |
+
size 952195
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.080.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0255c2addfd08ff484c62f1005ed6ef599275ee14c69bd315308801319719c27
|
3 |
+
size 845439
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.100.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d634e0fc7c323187f62a384caebbfb79f7b867a5dcb4e3e176f81c9d4e1199e6
|
3 |
+
size 741186
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.120.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9b32298de75ee46fc56fe8434f4de0488954d4e643453af37c31895dc054bb3
|
3 |
+
size 607038
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.140.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e93f2dd5b1adfa1bae0f4b40e44d5a0aa99495f56ef7bebc0bd04cada170d4e6
|
3 |
+
size 570117
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.160.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d3e2d7757921dfcdcf523975613cfb09ea98fb5d6536a203a6951dbdaf860c0
|
3 |
+
size 659235
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.180.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c62e5266ddb731cbce9b1dc84097c6547a651c6a5a004a96fbd19d674574db4
|
3 |
+
size 739247
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.200.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b20218750e9912bb87ce2ae4befd338b6ebbf8fd0fc6fdf34f7fb11111bfee7e
|
3 |
+
size 808172
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.220.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:916430590ac97cbe2cde47bd564b86c2517bf4459742fd4b165526ed76e351d7
|
3 |
+
size 1112779
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.240.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a72fab0753c16e840f55959046d70cb2dd2f0e09adaaa3bedbdb78ebba01c9b
|
3 |
+
size 1260708
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.260.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ecc2396582adf5f31101f1d18c5e2c1ed469731f68a1c29c304b99f2bc02ed2
|
3 |
+
size 1576480
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.280.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebb995ecd3942582d21312898b4af1dd3e7ed8a7f6db1abc7fada4d422df7525
|
3 |
+
size 1968780
|
data/logs/Phi-3-mini-128k-instruct_mm_false_RP_1.300.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6cc305ad454105f2184ff18b05d83c40d4b96f7004dc21fad3fa4aece0e8ed61
|
3 |
+
size 1902079
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.000.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aab4e09f01097db2ec29c611e6f0577b86b6683ae46315a37853eda3e16cc247
|
3 |
+
size 296794
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.020.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8ed7ea1a95896b99241ef185f2b86cb789b4d0b1b4510a5c920e5eff1be9bbb
|
3 |
+
size 295974
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.040.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98150aabee1cf5da8cea5cf3d809bd0e6f478b0e86f30841ec184a2221fd61e9
|
3 |
+
size 310890
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.060.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4d25cc0a02e1f6846ec89fa99dc52d30a96039b8fcfaa2322f73d6c2f493dd7
|
3 |
+
size 320210
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.080.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fb910fc3ebccc169cff86dfec15d2408b25a27a57e90ff6817d80c368034174
|
3 |
+
size 328353
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.100.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61d7f7397abae776f414053c12e1774d4ea9f2c33e4689649f776eab1f823525
|
3 |
+
size 354222
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.120.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bed0992f9c047bf1bd4717e9f598e52f055566bdea309bd343f07913ebd91db
|
3 |
+
size 361876
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.140.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f1530baffb28da8170a0f5f42d96dbc70a753a1dfddb68e413e1aefbfbecb09
|
3 |
+
size 386098
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.160.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2662b2c356f8aa161abc62366e64be0437ff2463602ee46f7babf043a45b96b8
|
3 |
+
size 426421
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.180.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a11fc5af4aba708b6672833d790e40e51d4e385795e61711964e7c109b6ed7b
|
3 |
+
size 464838
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.200.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e416b58de9f00889968157b5258a4824f5fb7cb3150b15dd21fb15e0c48dcbf
|
3 |
+
size 547414
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.220.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3da18c173c840bb3cdf049686f17900425872e804cb0b39cbf3ad228aa85a764
|
3 |
+
size 631203
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.240.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19e1c4576e4826448e774b6ebe205941c99d77577afa51318727175461ff0520
|
3 |
+
size 712226
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.260.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cde3922ca2a700b2ddb73dec98bd1c089740828a908b4cfd1ea3ec04e282de94
|
3 |
+
size 982022
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.280.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6afb0f63dc3fedcd801b180e2c1e9e424120c116c8a984cb69915f5b0b24065d
|
3 |
+
size 1828578
|
data/logs/Phi-3-mini-128k-instruct_mm_true_RP_1.300.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84fef5ceb2accec9baced9e281d1acba7a3e27dc4d2e6cc9719b051be99e9a63
|
3 |
+
size 2461619
|
data/results/Phi-3-mini-128k-instruct_mm_false.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a6980f9582f0f24572d2236ffb609668eea1fedbfad41620ef1c553d9cfb815
|
3 |
+
size 1882
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.000.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2daf55b0bf3f43e96858888a2049ed92e57278f2aca893b5a4d22dfa8c27744e
|
3 |
+
size 766193
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.020.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79b8a6657291b682f7c63271b76f82d23b7198cf2fc9bbbb291465b347ed7ce1
|
3 |
+
size 712005
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.040.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2a27cd806f07c83d1282899b3eeba03c3b25c9476ad2cdd8ad45c8baee49f55
|
3 |
+
size 566451
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.060.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08ef1e0cc9c8d66398c7b83f0bc03e8d4da71ef3e9115cb14b222af069a36cad
|
3 |
+
size 535477
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.080.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b483e1692e97cfd2105f495d192adfa2f604c7535528c74ad3e3ac209b93aa6
|
3 |
+
size 481749
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.100.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dca5ce54711e0ccb947d6636202421bf3a5295c6c67ca853681aab09152a274
|
3 |
+
size 429575
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.120.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdc3abe2a2bf8912ba9842568b67cb506b4b761e975c6e4cd884eb7c71f0b360
|
3 |
+
size 362069
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.140.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9475750f3bb73a0e049689e24135cd978ec4799224508c5e99231dbb8341c62
|
3 |
+
size 343147
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.160.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3604712c2ae43dea5657fe8c2bb3871a14fab6b3687a79db6cbd13a1803bf536
|
3 |
+
size 387540
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.180.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6296fc1d6c2c15f23584cd785236d9a576a33dc8344ea3746a905c8fd366a3ea
|
3 |
+
size 426486
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.200.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca5f4d82d2d1250b7bb5d5755038e8a6be23d0e3d2d0790fc45206100a1f335a
|
3 |
+
size 460681
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.220.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:60f0d9dbca87b8462741c80fd90a57ef5d69a47b425408898051054f85a3617f
|
3 |
+
size 608211
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.240.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:893bf178d4236acd7b9bfcaee745924b84942e2b6ffdd031025ad12127d9c938
|
3 |
+
size 688312
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.260.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:537028c668ecde760382acab5f927930f9aa52501afeca416d4647e094d95843
|
3 |
+
size 846104
|
data/results/Phi-3-mini-128k-instruct_mm_false_RP_1.280.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dab7b0b9100577232a3922ee270695cc5310db31fbc3aa74947e47d8b4e3ac0
|
3 |
+
size 1046454
|