Spaces:
Runtime error
Runtime error
magilogi
commited on
Commit
β’
cbf54c8
1
Parent(s):
aa9beda
new data
Browse files
data/api-results/__pycache__/api_results.cpython-311.pyc
CHANGED
Binary files a/data/api-results/__pycache__/api_results.cpython-311.pyc and b/data/api-results/__pycache__/api_results.cpython-311.pyc differ
|
|
data/api-results/api_results.py
CHANGED
@@ -29,4 +29,25 @@ claude_opus = {
|
|
29 |
'medqa_g2b': 0.8333333333333334,
|
30 |
'medmcqa_og': 0.8649425287356322,
|
31 |
'medmcqa_g2b': 0.7988505747126436
|
32 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
'medqa_g2b': 0.8333333333333334,
|
30 |
'medmcqa_og': 0.8649425287356322,
|
31 |
'medmcqa_g2b': 0.7988505747126436
|
32 |
+
}
|
33 |
+
|
34 |
+
gemini_15_pro = {
|
35 |
+
'medqa_og': 0.8862433862433863,
|
36 |
+
'medqa_g2b': 0.873015873015873,
|
37 |
+
'medmcqa_og': 0.8649425287356322,
|
38 |
+
'medmcqa_g2b': 0.8247126436781609
|
39 |
+
}
|
40 |
+
|
41 |
+
gemini_pro_1 = {
|
42 |
+
'medqa_og': 0.7063492063492064,
|
43 |
+
'medqa_g2b': 0.7301587301587301,
|
44 |
+
'medmcqa_og': 0.6810344827586207,
|
45 |
+
'medmcqa_g2b': 0.7385057471264368
|
46 |
+
}
|
47 |
+
|
48 |
+
gemini_15_flash = {
|
49 |
+
'medqa_og': 0.9708994708994709,
|
50 |
+
'medqa_g2b': 0.9603174603174603,
|
51 |
+
'medmcqa_og': 0.9741379310344828,
|
52 |
+
'medmcqa_g2b': 0.9482758620689655
|
53 |
+
}
|
data/csv/models_data.csv
CHANGED
@@ -22,3 +22,6 @@ T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_
|
|
22 |
π¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
|
23 |
π¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
|
24 |
π¬,"<a target=""_blank"" href=""https://www.anthropic.com/api"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
|
|
|
|
|
|
|
|
22 |
π¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
|
23 |
π¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
|
24 |
π¬,"<a target=""_blank"" href=""https://www.anthropic.com/api"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
|
25 |
+
π¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Pro</a>",0.0,,82.47,86.49,-4.02,87.3,88.62,-1.32
|
26 |
+
π¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini Pro 1</a>",0.0,,73.85,68.1,5.75,73.02,70.63,2.39
|
27 |
+
π¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Flash</a>",0.0,,94.83,97.41,-2.58,96.03,97.09,-1.06
|
src/__pycache__/models_info.cpython-311.pyc
CHANGED
Binary files a/src/__pycache__/models_info.cpython-311.pyc and b/src/__pycache__/models_info.cpython-311.pyc differ
|
|
src/json2df.py
CHANGED
@@ -7,7 +7,7 @@ import sys
|
|
7 |
sys.path.append(os.path.abspath('data/api-results'))
|
8 |
|
9 |
# Now import the API results
|
10 |
-
from api_results import gpt4, gpt4o, gpt35turbo, claude_opus
|
11 |
from models_info import model_info
|
12 |
|
13 |
directory = 'data/raw-eval-outputs'
|
@@ -49,7 +49,10 @@ api_models = {
|
|
49 |
'GPT-4': gpt4,
|
50 |
'GPT-4o': gpt4o,
|
51 |
'GPT-3.5 Turbo': gpt35turbo,
|
52 |
-
'Claude Opus': claude_opus
|
|
|
|
|
|
|
53 |
}
|
54 |
|
55 |
for model_name, results in api_models.items():
|
|
|
7 |
sys.path.append(os.path.abspath('data/api-results'))
|
8 |
|
9 |
# Now import the API results
|
10 |
+
from api_results import gpt4, gpt4o, gpt35turbo, claude_opus, gemini_15_pro, gemini_pro_1, gemini_15_flash
|
11 |
from models_info import model_info
|
12 |
|
13 |
directory = 'data/raw-eval-outputs'
|
|
|
49 |
'GPT-4': gpt4,
|
50 |
'GPT-4o': gpt4o,
|
51 |
'GPT-3.5 Turbo': gpt35turbo,
|
52 |
+
'Claude Opus': claude_opus,
|
53 |
+
'Gemini 1.5 Pro': gemini_15_pro,
|
54 |
+
'Gemini Pro 1': gemini_pro_1,
|
55 |
+
'Gemini 1.5 Flash': gemini_15_flash
|
56 |
}
|
57 |
|
58 |
for model_name, results in api_models.items():
|
src/models_info.py
CHANGED
@@ -91,5 +91,17 @@ model_info = {
|
|
91 |
"Claude Opus": {
|
92 |
"link": "https://www.anthropic.com/api",
|
93 |
"tuning": "π¬"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
}
|
95 |
}
|
|
|
91 |
"Claude Opus": {
|
92 |
"link": "https://www.anthropic.com/api",
|
93 |
"tuning": "π¬"
|
94 |
+
},
|
95 |
+
"Gemini 1.5 Pro": {
|
96 |
+
"link": "https://ai.google.dev/",
|
97 |
+
"tuning": "π¬"
|
98 |
+
},
|
99 |
+
"Gemini Pro 1": {
|
100 |
+
"link": "https://ai.google.dev/",
|
101 |
+
"tuning": "π¬"
|
102 |
+
},
|
103 |
+
"Gemini 1.5 Flash": {
|
104 |
+
"link": "https://ai.google.dev/",
|
105 |
+
"tuning": "π¬"
|
106 |
}
|
107 |
}
|