carbonnnnn
commited on
Commit
•
68e6513
1
Parent(s):
54d8c8e
working first draft
Browse files- app.py +107 -7
- requirements.txt +2 -1
- src/combined_data.json +1 -1
- src/main_df.csv +23 -23
- src/process_data.py +48 -0
- utils/__pycache__/filter_utils.cpython-310.pyc +0 -0
- utils/filter_utils.py +29 -0
app.py
CHANGED
@@ -3,9 +3,45 @@ import gradio as gr
|
|
3 |
import os
|
4 |
from gradio_rangeslider import RangeSlider
|
5 |
|
|
|
|
|
|
|
|
|
|
|
6 |
text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
|
7 |
text = "## The range is: {min} to {max}"
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
llm_calc_app = gr.Blocks()
|
11 |
with llm_calc_app:
|
@@ -14,12 +50,37 @@ with llm_calc_app:
|
|
14 |
"""
|
15 |
Main Filters Row
|
16 |
"""
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
|
25 |
with gr.Row():
|
@@ -28,13 +89,52 @@ with llm_calc_app:
|
|
28 |
"""
|
29 |
|
30 |
leaderboard_table = gr.Dataframe(
|
31 |
-
value=
|
32 |
elem_id="text-leaderboard-table",
|
33 |
interactive=False,
|
34 |
visible=True,
|
35 |
height=800
|
36 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
llm_calc_app.load()
|
40 |
llm_calc_app.queue()
|
|
|
3 |
import os
|
4 |
from gradio_rangeslider import RangeSlider
|
5 |
|
6 |
+
from utils.filter_utils import filter
|
7 |
+
|
8 |
+
# MAPS = filter_utils.LANG_MAPPING
|
9 |
+
|
10 |
+
# Main Leaderboard containing everything
|
11 |
text_leaderboard = pd.read_csv(os.path.join('src', 'main_df.csv'))
|
12 |
text = "## The range is: {min} to {max}"
|
13 |
|
14 |
+
# Short leaderboard containing fixed columns
|
15 |
+
short_leaderboard = text_leaderboard[[
|
16 |
+
'model_name',
|
17 |
+
'input_price',
|
18 |
+
'output_price',
|
19 |
+
'release_date',
|
20 |
+
'context_size',
|
21 |
+
'average_clemscore',
|
22 |
+
'average_latency',
|
23 |
+
'parameter_size',
|
24 |
+
]]
|
25 |
+
|
26 |
+
## Get Languages
|
27 |
+
langs = []
|
28 |
+
for i in range(len(text_leaderboard)):
|
29 |
+
lang_splits = text_leaderboard.iloc[i]['languages'].split(',')
|
30 |
+
lang_splits = [s.strip() for s in lang_splits]
|
31 |
+
langs += lang_splits
|
32 |
+
langs = list(set(langs))
|
33 |
+
langs.sort()
|
34 |
+
|
35 |
+
## Get input prices
|
36 |
+
ip_prices = []
|
37 |
+
op_prices = []
|
38 |
+
for i in range(len(text_leaderboard)):
|
39 |
+
ip_prices.append(text_leaderboard.iloc[i]['input_price'])
|
40 |
+
op_prices.append(text_leaderboard.iloc[i]['output_price'])
|
41 |
+
|
42 |
+
max_input_price = max(ip_prices)
|
43 |
+
max_output_price = max(op_prices)
|
44 |
+
|
45 |
|
46 |
llm_calc_app = gr.Blocks()
|
47 |
with llm_calc_app:
|
|
|
50 |
"""
|
51 |
Main Filters Row
|
52 |
"""
|
53 |
+
|
54 |
+
### Language filter
|
55 |
+
with gr.Row():
|
56 |
+
lang_dropdown = gr.Dropdown(
|
57 |
+
choices=langs,
|
58 |
+
value=[],
|
59 |
+
multiselect=True,
|
60 |
+
label="Select Languages 🕹️"
|
61 |
+
)
|
62 |
+
|
63 |
+
clemscore_slider = RangeSlider(
|
64 |
+
minimum=0,
|
65 |
+
maximum=100,
|
66 |
+
value=(0, 100),
|
67 |
+
label="Select Clemscore range"
|
68 |
+
)
|
69 |
+
|
70 |
+
input_pricing_slider = RangeSlider(
|
71 |
+
minimum=0,
|
72 |
+
maximum=max_input_price,
|
73 |
+
value=(0, max_input_price),
|
74 |
+
label="Select Price range /1M input tokens"
|
75 |
+
)
|
76 |
+
|
77 |
+
output_pricing_slider = RangeSlider(
|
78 |
+
minimum=0,
|
79 |
+
maximum=max_output_price,
|
80 |
+
value=(0, max_output_price),
|
81 |
+
label="Select Price range /1M output tokens"
|
82 |
+
)
|
83 |
+
|
84 |
|
85 |
|
86 |
with gr.Row():
|
|
|
89 |
"""
|
90 |
|
91 |
leaderboard_table = gr.Dataframe(
|
92 |
+
value=short_leaderboard,
|
93 |
elem_id="text-leaderboard-table",
|
94 |
interactive=False,
|
95 |
visible=True,
|
96 |
height=800
|
97 |
)
|
98 |
+
|
99 |
+
|
100 |
+
dummy_leaderboard_table = gr.Dataframe(
|
101 |
+
value=text_leaderboard,
|
102 |
+
elem_id="dummy-leaderboard-table",
|
103 |
+
interactive=False,
|
104 |
+
visible=False
|
105 |
+
)
|
106 |
|
107 |
+
lang_dropdown.change(
|
108 |
+
filter,
|
109 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
110 |
+
input_pricing_slider, output_pricing_slider],
|
111 |
+
[leaderboard_table],
|
112 |
+
queue=True
|
113 |
+
)
|
114 |
+
|
115 |
+
clemscore_slider.change(
|
116 |
+
filter,
|
117 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
118 |
+
input_pricing_slider, output_pricing_slider],
|
119 |
+
[leaderboard_table],
|
120 |
+
queue=True
|
121 |
+
)
|
122 |
+
|
123 |
+
input_pricing_slider.change(
|
124 |
+
filter,
|
125 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
126 |
+
input_pricing_slider, output_pricing_slider],
|
127 |
+
[leaderboard_table],
|
128 |
+
queue=True
|
129 |
+
)
|
130 |
+
|
131 |
+
output_pricing_slider.change(
|
132 |
+
filter,
|
133 |
+
[dummy_leaderboard_table, lang_dropdown, clemscore_slider,
|
134 |
+
input_pricing_slider, output_pricing_slider],
|
135 |
+
[leaderboard_table],
|
136 |
+
queue=True
|
137 |
+
)
|
138 |
|
139 |
llm_calc_app.load()
|
140 |
llm_calc_app.queue()
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
beautifulsoup4==4.12.3
|
2 |
pandas==2.2.3
|
3 |
-
gradio_rangeslider==0.0.7
|
|
|
|
1 |
beautifulsoup4==4.12.3
|
2 |
pandas==2.2.3
|
3 |
+
gradio_rangeslider==0.0.7
|
4 |
+
gradio==4.44.1
|
src/combined_data.json
CHANGED
@@ -303,7 +303,7 @@
|
|
303 |
"name": "Apache 2.0",
|
304 |
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
305 |
},
|
306 |
-
"languages": ["
|
307 |
"release_date": "2024-04-17",
|
308 |
"parameters": {
|
309 |
"estimated": false,
|
|
|
303 |
"name": "Apache 2.0",
|
304 |
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
305 |
},
|
306 |
+
"languages": ["en", "fr", "es", "de", "it", "ru"],
|
307 |
"release_date": "2024-04-17",
|
308 |
"parameters": {
|
309 |
"estimated": false,
|
src/main_df.csv
CHANGED
@@ -1,24 +1,24 @@
|
|
1 |
model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,license_name,license_url,languages,release_date,open_weight,context_size,average_clemscore,average_latency,parameter_size,estimated
|
2 |
-
Meta-Llama-3-70B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,
|
3 |
-
Meta-Llama-3-8B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,
|
4 |
-
Meta-Llama-3.1-405B-Instruct-Turbo,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"
|
5 |
-
Meta-Llama-3.1-70B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"
|
6 |
-
Meta-Llama-3.1-8B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"
|
7 |
-
InternVL2-40B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"
|
8 |
-
InternVL2-8B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"
|
9 |
-
InternVL2-Llama3-76B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"
|
10 |
-
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"
|
11 |
-
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"
|
12 |
-
Mistral-Large-Instruct-2407,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
13 |
-
Mixtral-8x22B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
14 |
-
Mistral-7B-Instruct-v0.2,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
15 |
-
Mistral-7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
16 |
-
Mixtral-8x7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"
|
17 |
-
openchat-3.5-0106,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,
|
18 |
-
openchat-3.5-1210,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,
|
19 |
-
openchat_3.5,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,
|
20 |
-
gpt-4o-mini-2024-07-18,0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
21 |
-
gpt-4o-2024-08-06,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
22 |
-
gpt-4o-2024-05-13,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
23 |
-
gpt-4-1106-vision-preview,10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"
|
24 |
-
gemini-1.5-flash-latest,0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"
|
|
|
1 |
model_name,input_price,output_price,multimodality_image,multimodality_multiple_image,multimodality_audio,multimodality_video,source,license_name,license_url,languages,release_date,open_weight,context_size,average_clemscore,average_latency,parameter_size,estimated
|
2 |
+
Meta-Llama-3-70B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,English,2024-04-18,True,8192,11.703333333333333,1.1160853862207483,70.0,False
|
3 |
+
Meta-Llama-3-8B-Instruct-hf,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct,Meta Llama 3 License,https://www.llama.com/llama3/license/,English,2024-04-18,True,8192,6.663333333333333,0.7054825144189354,8.0,False
|
4 |
+
Meta-Llama-3.1-405B-Instruct-Turbo,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,17.37,0.2628701315515277,405.0,False
|
5 |
+
Meta-Llama-3.1-70B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,12.943333333333333,0.27016850919817575,70.0,False
|
6 |
+
Meta-Llama-3.1-8B-Instruct,0.0,0.0,False,False,False,False,https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct,Llama 3.1 Community License,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,"English, German, French, Italian, Hindi, Portuguese, Spanish, Thai",2024-07-23,True,131072,6.12,0.06876858280202812,8.0,False
|
7 |
+
InternVL2-40B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-40B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,21.810000000000002,2.609271782765464,40.0,False
|
8 |
+
InternVL2-8B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-8B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,19.74,0.8367998047485775,8.0,False
|
9 |
+
InternVL2-Llama3-76B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,25.709999999999997,4.591395944741546,76.0,False
|
10 |
+
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,23.24,1.7593004986949285,26.0,False
|
11 |
+
InternVL2-26B,0.0,0.0,True,True,False,False,https://huggingface.co/OpenGVLab/InternVL2-26B,MIT,https://choosealicense.com/licenses/mit/,"Chinese, English, French, Spanish, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Arabic",2024-07-15,True,8192,23.24,1.7593004986949285,26.0,False
|
12 |
+
Mistral-Large-Instruct-2407,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-Large-Instruct-2407,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese, Japanese, Korean",2024-06-12,True,8192,15.13,0.41482225628780656,70.0,False
|
13 |
+
Mixtral-8x22B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian",2024-04-17,True,8192,4.2299999999999995,0.3586451521191292,141.0,False
|
14 |
+
Mistral-7B-Instruct-v0.2,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese",2024-01-15,True,8192,3.25,0.25450503989030154,7.0,False
|
15 |
+
Mistral-7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian, Chinese",2023-12-11,True,8192,2.67,0.09428825169239076,7.0,False
|
16 |
+
Mixtral-8x7B-Instruct-v0.1,0.0,0.0,False,False,False,False,https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,"English, French, Spanish, German, Italian, Russian",2023-12-11,True,8192,2.723333333333333,0.31309892202121054,46.7,False
|
17 |
+
openchat-3.5-0106,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-0106,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2024-01-06,True,8192,5.7,0.09736504835188847,7.0,False
|
18 |
+
openchat-3.5-1210,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat-3.5-1210,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2023-12-10,True,8192,6.073333333333333,0.09349942563676568,7.0,False
|
19 |
+
openchat_3.5,0.0,0.0,False,False,False,False,https://huggingface.co/openchat/openchat_3.5,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,English,2023-10-30,True,8192,7.88,0.10576256228206875,7.0,False
|
20 |
+
gpt-4o-mini-2024-07-18,0.15,0.6,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-07-18,False,131072,52.32333333333333,1.619222935116773,8.0,True
|
21 |
+
gpt-4o-2024-08-06,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-08-06,False,131072,69.57000000000001,1.5771123003908176,200.0,True
|
22 |
+
gpt-4o-2024-05-13,2.5,10.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2024-05-13,False,131072,66.87333333333333,3.704921340164487,200.0,True
|
23 |
+
gpt-4-1106-vision-preview,10.0,30.0,True,True,False,False,https://openai.com/api/pricing/,Commercial License,https://openai.com/policies/terms-of-use,"English, Spanish, French, German, Chinese, Japanese, Korean, Italian, Portuguese, Dutch, Russian, Arabic, Hindi, Turkish, Vietnamese, Polish, Thai, Swedish, Danish, Norwegian, Finnish, Hungarian, Czech, Slovak, Romanian, Bulgarian, Ukrainian, Lithuanian, Latvian, Estonian, Slovenian, Malay, Indonesian, Tagalog, Swahili, Amharic",2023-11-06,False,131072,47.23,2.217200177676117,1760.0,True
|
24 |
+
gemini-1.5-flash-latest,0.075,0.3,True,True,True,True,https://cloud.google.com/vertex-ai/generative-ai/pricing,Commercial License,,"Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovenian, Spanish, Swahili, Swedish, Thai, Turkish, Ukrainian, Vietnamese, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian, Arabic, Bengali, Bulgarian",2024-05-24,False,131072,42.53666666666667,26.268280234692302,1760.0,True
|
src/process_data.py
CHANGED
@@ -108,8 +108,56 @@ for col in additional_price_columns:
|
|
108 |
# Clean and convert context to integer
|
109 |
df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
|
110 |
|
|
|
|
|
111 |
df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
# Keep only the specified columns
|
114 |
df = df[[
|
115 |
'model_name',
|
|
|
108 |
# Clean and convert context to integer
|
109 |
df['context_size'] = df['context_size'].replace({'k': ''}, regex=True).astype(int)
|
110 |
|
111 |
+
df['context_size'] = df['context_size']*1024
|
112 |
+
|
113 |
df['parameter_size'] = df['parameter_size'].replace({'B': '', '': None}, regex=True).astype(float)
|
114 |
|
115 |
+
LANG_MAPPING = {
|
116 |
+
'el': 'Greek',
|
117 |
+
'id': 'Indonesian',
|
118 |
+
'ko': 'Korean',
|
119 |
+
'sv': 'Swedish',
|
120 |
+
'de': 'German',
|
121 |
+
'lv': 'Latvian',
|
122 |
+
'am': 'Amharic',
|
123 |
+
'fi': 'Finnish',
|
124 |
+
'da': 'Danish',
|
125 |
+
'pt': 'Portuguese',
|
126 |
+
'sw': 'Swahili',
|
127 |
+
'es': 'Spanish',
|
128 |
+
'it': 'Italian',
|
129 |
+
'bn': 'Bengali',
|
130 |
+
'nl': 'Dutch',
|
131 |
+
'lt': 'Lithuanian',
|
132 |
+
'ro': 'Romanian',
|
133 |
+
'sl': 'Slovenian',
|
134 |
+
'hu': 'Hungarian',
|
135 |
+
'hr': 'Croatian',
|
136 |
+
'vi': 'Vietnamese',
|
137 |
+
'hi': 'Hindi',
|
138 |
+
'zh': 'Chinese',
|
139 |
+
'pl': 'Polish',
|
140 |
+
'ar': 'Arabic',
|
141 |
+
'cs': 'Czech',
|
142 |
+
'sk': 'Slovak',
|
143 |
+
'ja': 'Japanese',
|
144 |
+
'no': 'Norwegian',
|
145 |
+
'uk': 'Ukrainian',
|
146 |
+
'fr': 'French',
|
147 |
+
'et': 'Estonian',
|
148 |
+
'ru': 'Russian',
|
149 |
+
'th': 'Thai',
|
150 |
+
'bg': 'Bulgarian',
|
151 |
+
'tr': 'Turkish',
|
152 |
+
'ms': 'Malay',
|
153 |
+
'he': 'Hebrew',
|
154 |
+
'tl': 'Tagalog',
|
155 |
+
'sr': 'Serbian',
|
156 |
+
'en': 'English'
|
157 |
+
}
|
158 |
+
|
159 |
+
df['languages'] = df['languages'].apply(lambda x: ', '.join([LANG_MAPPING.get(lang, lang) for lang in x.split(', ')]))
|
160 |
+
|
161 |
# Keep only the specified columns
|
162 |
df = df[[
|
163 |
'model_name',
|
utils/__pycache__/filter_utils.cpython-310.pyc
ADDED
Binary file (1.02 kB). View file
|
|
utils/filter_utils.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Utility functions for filtering the dataframe
|
2 |
+
|
3 |
+
def filter_cols(df):
|
4 |
+
|
5 |
+
df = df[[
|
6 |
+
'model_name',
|
7 |
+
'input_price',
|
8 |
+
'output_price',
|
9 |
+
'release_date',
|
10 |
+
'context_size',
|
11 |
+
'average_clemscore',
|
12 |
+
'average_latency',
|
13 |
+
'parameter_size',
|
14 |
+
]]
|
15 |
+
|
16 |
+
return df
|
17 |
+
|
18 |
+
|
19 |
+
def filter(df, language_list, clemscore, input_price, output_price):
|
20 |
+
df = df[df['languages'].apply(lambda x: all(lang in x for lang in language_list))]
|
21 |
+
df = df[(df['average_clemscore'] >= clemscore[0]) & (df['average_clemscore'] <= clemscore[1])]
|
22 |
+
df = df[(df['input_price'] >= input_price[0]) & (df['input_price'] <= input_price[1])]
|
23 |
+
df = df[(df['output_price'] >= output_price[0]) & (df['output_price'] <= output_price[1])]
|
24 |
+
|
25 |
+
df = filter_cols(df)
|
26 |
+
return df # Return the filtered dataframe
|
27 |
+
|
28 |
+
|
29 |
+
|