Commit
·
834ab51
1
Parent(s):
8b7a2fd
minor fixes
Browse files- app.py +6 -1
- assets/merged_data.csv +0 -71
- assets/text_content.py +1 -0
- src/filter_utils.py +7 -26
- src/process_data.py +14 -0
app.py
CHANGED
@@ -4,13 +4,15 @@ import os
|
|
4 |
from gradio_rangeslider import RangeSlider
|
5 |
import calendar
|
6 |
import datetime
|
|
|
7 |
|
8 |
from src.filter_utils import filter, filter_cols
|
9 |
from src.process_data import merge_data
|
10 |
import assets.text_content as tc
|
11 |
|
12 |
# Main Leaderboard containing everything
|
13 |
-
text_leaderboard = pd.read_csv(os.path.join('assets', 'merged_data.csv'))
|
|
|
14 |
text_leaderboard = text_leaderboard.sort_values(by=tc.CLEMSCORE, ascending=False)
|
15 |
|
16 |
# When displaying latency values
|
@@ -68,6 +70,9 @@ parameter_step = 1
|
|
68 |
|
69 |
min_context = min(contexts)
|
70 |
max_context = max(contexts)
|
|
|
|
|
|
|
71 |
context_step = 8
|
72 |
|
73 |
min_date = min(dates)
|
|
|
4 |
from gradio_rangeslider import RangeSlider
|
5 |
import calendar
|
6 |
import datetime
|
7 |
+
import numpy as np
|
8 |
|
9 |
from src.filter_utils import filter, filter_cols
|
10 |
from src.process_data import merge_data
|
11 |
import assets.text_content as tc
|
12 |
|
13 |
# Main Leaderboard containing everything
|
14 |
+
# text_leaderboard = pd.read_csv(os.path.join('assets', 'merged_data.csv'))
|
15 |
+
text_leaderboard = merge_data()
|
16 |
text_leaderboard = text_leaderboard.sort_values(by=tc.CLEMSCORE, ascending=False)
|
17 |
|
18 |
# When displaying latency values
|
|
|
70 |
|
71 |
min_context = min(contexts)
|
72 |
max_context = max(contexts)
|
73 |
+
print(min_context, max_context)
|
74 |
+
print("SOMETHING")
|
75 |
+
print(contexts)
|
76 |
context_step = 8
|
77 |
|
78 |
min_date = min(dates)
|
assets/merged_data.csv
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
Model Name,Latency (s),Clemscore,Parameters (B),Release Date,Open Weight,Languages,Context Size (k),License Name,License URL,Single Image,Multi Image,Audio,Video,Input $/1M tokens,Output $/1M tokens,License,Temp Date
|
2 |
-
o1-preview-2024-09-12,7.368572853601854,73.63,,2024-09-12,False,"English, Spanish, French, German, Italian, Portuguese, Russian, Chinese, Japanese, Korean, Arabic, Hindi, Bengali, Telugu, Tamil, Malayalam, Kannada, Gujarati, Marathi, Panjabi, Urdu, Vietnamese, Thai, Indonesian, Malay (macrolanguage), Tagalog, Swahili (macrolanguage), Yoruba, Zulu, Turkish, Hebrew, Persian, Ukrainian, Polish, Czech, Hungarian, Romanian, Dutch, Swedish, Norwegian, Danish, Finnish, Modern Greek (1453-), Bulgarian, Serbian, Croatian, Slovak, Icelandic",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,False,False,False,False,15.0,60.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-09-12
|
3 |
-
gpt-4-1106-vision-preview,4.712557435752081,73.55,,2023-11-06,False,"English, Italian, Afrikaans, Spanish, German, French, Indonesian, Russian, Polish, Ukrainian, Modern Greek (1453-), Latvian, Chinese, Arabic, Turkish, Japanese, Swahili (macrolanguage), Welsh, Korean, Icelandic, Bengali, Urdu, Nepali (macrolanguage), Thai, Panjabi, Marathi, Telugu",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,True,True,False,False,10.0,30.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2023-11-06
|
4 |
-
claude-3-5-sonnet-20240620,2.0645066812060726,68.925,,2024-06-20,False,"English, Spanish, French, Japanese",200k,Anthropic,https://www.anthropic.com/legal/commercial-terms,True,True,False,False,3.0,15.0,[Anthropic](https://www.anthropic.com/legal/commercial-terms),2024-06-20
|
5 |
-
gpt-4o-2024-08-06,1.951333607454077,63.875,,2024-08-06,False,"English, Spanish, French, German, Italian, Portuguese, Russian, Chinese, Japanese, Korean, Arabic, Hindi, Bengali, Telugu, Tamil, Malayalam, Kannada, Gujarati, Marathi, Panjabi, Urdu, Vietnamese, Thai, Indonesian, Malay (macrolanguage), Tagalog, Swahili (macrolanguage), Yoruba, Zulu, Turkish, Hebrew, Persian, Ukrainian, Polish, Czech, Hungarian, Romanian, Dutch, Swedish, Norwegian, Danish, Finnish, Modern Greek (1453-), Bulgarian, Serbian, Croatian, Slovak, Icelandic",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,True,True,False,False,3.75,15.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-08-06
|
6 |
-
gpt-4o-2024-05-13,5.022646224034688,58.95,,2024-05-13,False,"English, Spanish, French, German, Italian, Portuguese, Russian, Chinese, Japanese, Korean, Arabic, Hindi, Bengali, Telugu, Tamil, Malayalam, Kannada, Gujarati, Marathi, Panjabi, Urdu, Vietnamese, Thai, Indonesian, Malay (macrolanguage), Tagalog, Swahili (macrolanguage), Yoruba, Zulu, Turkish, Hebrew, Persian, Ukrainian, Polish, Czech, Hungarian, Romanian, Dutch, Swedish, Norwegian, Danish, Finnish, Modern Greek (1453-), Bulgarian, Serbian, Croatian, Slovak, Icelandic",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,True,True,False,False,5.0,15.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-05-13
|
7 |
-
gpt-4-turbo-2024-04-09,,58.3,,2024-04-09,False,"English, Italian, Afrikaans, Spanish, German, French, Indonesian, Russian, Polish, Ukrainian, Modern Greek (1453-), Latvian, Chinese, Arabic, Turkish, Japanese, Swahili (macrolanguage), Welsh, Korean, Icelandic, Bengali, Urdu, Nepali (macrolanguage), Thai, Panjabi, Marathi, Telugu",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,False,False,False,False,10.0,30.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-04-09
|
8 |
-
claude-3-opus-20240229,3.916101346449241,55.29,,2024-02-29,False,"English, Spanish, French, Japanese",200k,Anthropic,https://www.anthropic.com/legal/commercial-terms,True,True,False,False,15.0,75.0,[Anthropic](https://www.anthropic.com/legal/commercial-terms),2024-02-29
|
9 |
-
gpt-4-0125-preview,1.0418927523113648,52.5,,2024-01-25,False,"English, Italian, Afrikaans, Spanish, German, French, Indonesian, Russian, Polish, Ukrainian, Modern Greek (1453-), Latvian, Chinese, Arabic, Turkish, Japanese, Swahili (macrolanguage), Welsh, Korean, Icelandic, Bengali, Urdu, Nepali (macrolanguage), Thai, Panjabi, Marathi, Telugu",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,False,False,False,False,10.0,30.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-01-25
|
10 |
-
Meta-Llama-3.1-405B-Instruct-Turbo,0.7886103946545819,52.11,405.0,2024-07-23,True,"English, German, French, Italian, Portuguese, Hindi, Spanish, Thai",128k,Meta,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,False,False,False,False,0.0,0.0,[Meta](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE),2024-07-23
|
11 |
-
gpt-4-1106-preview,0.7767265743542736,51.99,,2023-11-06,False,"English, Italian, Afrikaans, Spanish, German, French, Indonesian, Russian, Polish, Ukrainian, Modern Greek (1453-), Latvian, Chinese, Arabic, Turkish, Japanese, Swahili (macrolanguage), Welsh, Korean, Icelandic, Bengali, Urdu, Nepali (macrolanguage), Thai, Panjabi, Marathi, Telugu",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,False,False,False,False,0.0,0.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2023-11-06
|
12 |
-
gpt-4-0613,0.648441146582876,51.09,,2023-06-13,False,"English, Italian, Afrikaans, Spanish, German, French, Indonesian, Russian, Polish, Ukrainian, Modern Greek (1453-), Latvian, Chinese, Arabic, Turkish, Japanese, Swahili (macrolanguage), Welsh, Korean, Icelandic, Bengali, Urdu, Nepali (macrolanguage), Thai, Panjabi, Marathi, Telugu",8k,OpenAI,https://openai.com/policies/row-terms-of-use/,False,False,False,False,0.0,0.0,[OpenAI](https://openai.com/policies/row-terms-of-use/),2023-06-13
|
13 |
-
gpt-4o-mini-2024-07-18,2.08647007916325,46.55,,2024-07-18,False,"English, Spanish, French, German, Italian, Portuguese, Russian, Chinese, Japanese, Korean, Arabic, Hindi, Bengali, Telugu, Tamil, Malayalam, Kannada, Gujarati, Marathi, Panjabi, Urdu, Vietnamese, Thai, Indonesian, Malay (macrolanguage), Tagalog, Swahili (macrolanguage), Yoruba, Zulu, Turkish, Hebrew, Persian, Ukrainian, Polish, Czech, Hungarian, Romanian, Dutch, Swedish, Norwegian, Danish, Finnish, Modern Greek (1453-), Bulgarian, Serbian, Croatian, Slovak, Icelandic",128k,OpenAI,https://openai.com/policies/row-terms-of-use/,True,True,False,False,0.3,1.2,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-07-18
|
14 |
-
Mistral-Large-Instruct-2407,1.2444667688634192,45.39,123.0,2024-07-24,True,"English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese, Russian, Korean",128k,Mistral,https://mistral.ai/news/mistral-ai-non-production-license-mnpl,False,False,False,False,0.0,0.0,[Mistral](https://mistral.ai/news/mistral-ai-non-production-license-mnpl),2024-07-24
|
15 |
-
Meta-Llama-3.1-70B-Instruct,0.8105055275945292,38.83,70.0,2024-07-23,True,"English, German, French, Italian, Portuguese, Hindi, Spanish, Thai",128k,Meta,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,False,False,False,False,0.0,0.0,[Meta](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE),2024-07-23
|
16 |
-
InternVL2-26B,4.239272214812438,37.45,26.0,2024-07-15,True,"Chinese, English",8k,MIT,https://choosealicense.com/licenses/mit/,True,True,False,False,0.0,0.0,[MIT](https://choosealicense.com/licenses/mit/),2024-07-15
|
17 |
-
InternVL2-Llama3-76B,10.660117299385416,33.84,76.0,2024-07-15,True,English,8k,Meta,https://huggingface.co/meta-llama/Meta-Llama-3-8B/blob/main/LICENSE,True,True,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Meta-Llama-3-8B/blob/main/LICENSE),2024-07-15
|
18 |
-
claude-2.1,1.6836316221022516,32.5,,2023-11-21,False,English,200k,Anthropic,https://www.anthropic.com/legal/commercial-terms,False,False,False,False,8.0,24.0,[Anthropic](https://www.anthropic.com/legal/commercial-terms),2023-11-21
|
19 |
-
InternVL2-40B,6.267102418391484,32.23,40.0,2024-07-15,True,"Chinese, English",8k,MIT,https://choosealicense.com/licenses/mit/,True,True,False,False,0.0,0.0,[MIT](https://choosealicense.com/licenses/mit/),2024-07-15
|
20 |
-
claude-3-sonnet-20240229,1.4194860128225952,30.53,,2024-02-29,False,"English, Spanish, French, Japanese",200k,Anthropic,https://www.anthropic.com/legal/commercial-terms,True,True,False,False,3.0,15.0,[Anthropic](https://www.anthropic.com/legal/commercial-terms),2024-02-29
|
21 |
-
Qwen1.5-72B-Chat,12.689668927658191,30.37,72.0,2024-01-30,True,"Arabic, Spanish, French, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Indonesian, English, Chinese",32k,Qwen,https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT,False,False,False,False,0.0,0.0,[Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT),2024-01-30
|
22 |
-
Qwen2-72B-Instruct,0.9480584860151366,30.03,72.0,2024-05-28,True,English,128k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-05-28
|
23 |
-
idefics-80b-instruct,6.8089303915502315,29.55,80.0,2023-07-24,True,English,2k,Meta,https://huggingface.co/HuggingFaceM4/idefics-80b-instruct#license,True,True,False,False,0.0,0.0,[Meta](https://huggingface.co/HuggingFaceM4/idefics-80b-instruct#license),2023-07-24
|
24 |
-
Pixtral-12B-2409,1.4976731684122335,28.64,12.0,2024-09-11,True,"English, French, German, Spanish, Italian, Portuguese, Russian, Chinese, Japanese",128k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-09-11
|
25 |
-
mistral-large-2402,0.3967416598893965,28.17,123.0,2024-02-01,True,"English, German, French, Italian, Spanish",128k,Mistral,https://mistral.ai/licenses/MRL-0.1.md,False,False,False,False,0.0,0.0,[Mistral](https://mistral.ai/licenses/MRL-0.1.md),2024-02-01
|
26 |
-
Qwen2.5-Coder-32B-Instruct,0.8337066960552915,27.57,32.0,2024-11-06,True,English,128k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-11-06
|
27 |
-
gemma-2-9b-it,0.3692553324432573,27.34,9.0,2024-06-24,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-06-24
|
28 |
-
gpt-3.5-turbo-0125,,27.22,,2024-01-25,False,"English, Italian, Afrikaans, Spanish, German, French, Indonesian, Russian, Polish, Ukrainian, Modern Greek (1453-), Latvian, Chinese, Arabic, Turkish, Japanese, Swahili (macrolanguage), Welsh, Korean, Icelandic, Bengali, Urdu, Nepali (macrolanguage), Thai, Panjabi, Marathi, Telugu",16k,OpenAI,https://openai.com/policies/row-terms-of-use/,False,False,False,False,0.5,1.5,[OpenAI](https://openai.com/policies/row-terms-of-use/),2024-01-25
|
29 |
-
command-r-plus,0.3104016019283746,24.94,104.0,2024-04-01,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-04-01
|
30 |
-
openchat_3.5,0.3172876868462049,23.64,7.0,2023-10-30,True,English,8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-10-30
|
31 |
-
InternVL2-8B,1.948600327851168,23.17,8.0,2024-07-15,True,"Chinese, English",8k,MIT,https://choosealicense.com/licenses/mit/,True,True,False,False,0.0,0.0,[MIT](https://choosealicense.com/licenses/mit/),2024-07-15
|
32 |
-
claude-3-haiku-20240307,0.8695497396191068,22.49,,2024-03-07,False,"English, Spanish, French, Japanese",200k,Anthropic,https://www.anthropic.com/legal/commercial-terms,True,True,False,False,0.25,1.25,[Anthropic](https://www.anthropic.com/legal/commercial-terms),2024-03-07
|
33 |
-
sheep-duck-llama-2-70b-v1.1,5.524607914346901,21.5,70.0,2023-09-27,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-09-27
|
34 |
-
Meta-Llama-3.1-8B-Instruct,0.206305748406081,18.36,8.0,2024-07-23,True,"English, German, French, Italian, Portuguese, Hindi, Spanish, Thai",128k,Meta,https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE,False,False,False,False,0.0,0.0,[Meta](https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/LICENSE),2024-07-23
|
35 |
-
openchat-3.5-1210,0.280498276910299,18.22,7.0,2023-12-10,True,English,8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-12-10
|
36 |
-
Idefics3-8B-Llama3,2.7247848158020003,17.52,8.0,2024-08-05,True,English,128k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-08-05
|
37 |
-
WizardLM-70b-v1.0,3.924977203883497,17.4,70.0,2023-08-09,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-08-09
|
38 |
-
openchat-3.5-0106,0.2920951450556648,17.1,7.0,2024-01-06,True,English,8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-01-06
|
39 |
-
internlm-xcomposer2d5-7b,8.438096179522176,16.95,7.0,2024-07-02,True,"Chinese, English",16k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,True,True,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-07-02
|
40 |
-
mistral-medium-2312,3.3167870515212083,16.43,,2023-12-01,True,"English, German, French, Italian, Spanish",128k,Mistral,https://mistral.ai/licenses/MRL-0.1.md,False,False,False,False,0.0,0.0,[Mistral](https://mistral.ai/licenses/MRL-0.1.md),2023-12-01
|
41 |
-
Phi-3.5-vision-instruct,1.540488050470713,15.64,4.0,2024-08-17,True,English,128k,MIT,https://choosealicense.com/licenses/mit/,True,True,False,False,0.0,0.0,[MIT](https://choosealicense.com/licenses/mit/),2024-08-17
|
42 |
-
codegemma-7b-it,0.3048974050865229,15.3,7.0,2024-04-09,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-04-09
|
43 |
-
CodeLlama-34b-Instruct-hf,3.851887315425933,14.35,34.0,2023-08-24,True,English,100k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-08-24
|
44 |
-
command-r,0.1883241491458606,14.15,35.0,2024-03-01,True,English,,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-03-01
|
45 |
-
gemma-1.1-7b-it,0.1782953878345496,14.14,7.0,2024-03-26,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-03-26
|
46 |
-
SUS-Chat-34B,2.27951476106911,14.11,34.0,2023-11-29,True,"English, Chinese",8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-11-29
|
47 |
-
aya-23-35B,0.5755088395104287,13.35,35.0,2024-05-19,True,"English, French, German, Spanish, Italian, Portuguese, Japanese, Korean, Chinese, Arabic, Modern Greek (1453-), Persian, Polish, Indonesian, Czech, iw, Hindi, Dutch, Romanian, Russian, Turkish, Ukrainian, Vietnamese",8k,CC-BY-NC,https://spdx.org/licenses/CC-BY-NC-4.0,False,False,False,False,0.0,0.0,[CC-BY-NC](https://spdx.org/licenses/CC-BY-NC-4.0),2024-05-19
|
48 |
-
Mixtral-8x22B-Instruct-v0.1,1.0759354563573875,12.69,141.0,2024-04-17,True,"English, French, German, Italian, Spanish",64k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-04-17
|
49 |
-
tulu-2-dpo-70b,7.848597339328536,12.62,70.0,2023-11-12,True,English,4k,Ai2,https://allenai.org/terms/2024-09-25,False,False,False,False,0.0,0.0,[Ai2](https://allenai.org/terms/2024-09-25),2023-11-12
|
50 |
-
idefics-9b-instruct,4.156911970172687,12.29,9.0,2023-07-24,True,English,2k,Meta,https://huggingface.co/HuggingFaceM4/idefics-80b-instruct#license,True,True,False,False,0.0,0.0,[Meta](https://huggingface.co/HuggingFaceM4/idefics-80b-instruct#license),2023-07-24
|
51 |
-
aya-23-8B,0.4818848185613353,11.72,8.0,2024-05-19,True,"English, French, German, Spanish, Italian, Portuguese, Japanese, Korean, Chinese, Arabic, Modern Greek (1453-), Persian, Polish, Indonesian, Czech, iw, Hindi, Dutch, Romanian, Russian, Turkish, Ukrainian, Vietnamese",8k,CC-BY-NC,https://spdx.org/licenses/CC-BY-NC-4.0,False,False,False,False,0.0,0.0,[CC-BY-NC](https://spdx.org/licenses/CC-BY-NC-4.0),2024-05-19
|
52 |
-
WizardLM-13b-v1.2,3.5654367625763,11.48,13.0,2023-07-25,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-07-25
|
53 |
-
vicuna-33b-v1.3,0.8235025152162306,11.27,33.0,2023-06-21,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-06-21
|
54 |
-
Llama-3.1-Nemotron-70B-Instruct-HF,1.105406813859938,10.16,70.0,2024-10-12,True,English,128k,Meta,https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct/blob/main/LICENSE,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct/blob/main/LICENSE),2024-10-12
|
55 |
-
Yi-34B-Chat,1.2871676207135438,8.27,34.0,2023-11-22,True,"English, Chinese",4k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-11-22
|
56 |
-
Mixtral-8x7B-Instruct-v0.1,0.9392967660636314,8.17,46.7,2023-12-11,True,"English, French, German, Italian, Spanish",16k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-12-11
|
57 |
-
Mixtral-8x7B-Instruct-v0.1,0.9392967660636314,8.17,46.7,2023-12-11,True,"English, French, German, Italian, Spanish",16k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-12-11
|
58 |
-
Mistral-7B-Instruct-v0.1,0.2828647550771728,8.01,7.0,2023-09-27,True,English,8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2023-09-27
|
59 |
-
vicuna-13b-v1.5,1.4753938719676598,7.01,13.0,2023-07-29,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-07-29
|
60 |
-
Starling-LM-7B-beta,1.365002297029703,6.56,7.0,2024-03-19,True,English,8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-03-19
|
61 |
-
Phi-3-mini-128k-instruct,0.6615315832127354,6.33,3.8,2024-04-22,True,English,128k,MIT,https://choosealicense.com/licenses/mit/,False,False,False,False,0.0,0.0,[MIT](https://choosealicense.com/licenses/mit/),2024-04-22
|
62 |
-
Qwen2-7B-Instruct,0.3589407217948714,6.18,7.0,2024-06-04,True,English,128k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-06-04
|
63 |
-
salamandra-7b-instruct,0.3894831193548387,6.04,7.0,2024-09-30,True,"Bulgarian, Catalan, Czech, Welsh, Danish, German, Modern Greek (1453-), English, Spanish, Estonian, Basque, Finnish, French, Irish, Galician, Croatian, Hungarian, Italian, Lithuanian, Latvian, Maltese, Dutch, Norwegian Nynorsk, Occitan (post 1500), Polish, Portuguese, Romanian, Russian, Serbo-Croatian, Slovak, Slovenian, Serbian, Swedish, Ukrainian",8k,Apache 2.0,https://www.apache.org/licenses/LICENSE-2.0,False,False,False,False,0.0,0.0,[Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0),2024-09-30
|
64 |
-
sheep-duck-llama-2-13b,2.9462099794520573,5.39,13.0,2023-10-04,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-10-04
|
65 |
-
dolphin-vision-72b,10.190958003739729,4.65,72.0,2024-06-28,True,English,128k,Qwen,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE,True,True,False,False,0.0,0.0,[Qwen](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct/blob/main/LICENSE),2024-06-28
|
66 |
-
gemma-2-27b-it,0.9922771009345794,3.51,27.0,2024-06-24,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-06-24
|
67 |
-
gemma-1.1-2b-it,0.1192569946127946,2.91,2.0,2024-03-26,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-03-26
|
68 |
-
gemma-2-2b-it,0.3139821517919889,2.67,2.0,2024-07-16,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-07-16
|
69 |
-
Qwen1.5-7B-Chat,0.3898907690883847,2.58,7.0,2024-01-30,True,"Arabic, Spanish, French, Portuguese, German, Italian, Russian, Japanese, Korean, Vietnamese, Thai, Indonesian, English, Chinese",32k,Qwen,https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT,False,False,False,False,0.0,0.0,[Qwen](https://github.com/QwenLM/Qwen/blob/main/Tongyi%20Qianwen%20LICENSE%20AGREEMENT),2024-01-30
|
70 |
-
gemma-7b-it,0.6112263564356434,1.82,7.0,2024-02-21,True,English,8k,Gemma,https://ai.google.dev/gemma/terms,False,False,False,False,0.0,0.0,[Gemma](https://ai.google.dev/gemma/terms),2024-02-21
|
71 |
-
llama-2-70b-chat-hf,4.724659620079607,0.81,70.0,2023-07-18,True,English,4k,Meta,https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt,False,False,False,False,0.0,0.0,[Meta](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/LICENSE.txt),2023-07-18
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
assets/text_content.py
CHANGED
@@ -20,6 +20,7 @@ MODEL_NAME = "Model Name"
|
|
20 |
CLEMSCORE = "Clemscore"
|
21 |
LATENCY = "Latency (s)"
|
22 |
PARAMS = "Parameters (B)"
|
|
|
23 |
RELEASE_DATE = 'Release Date'
|
24 |
OPEN_WEIGHT = 'Open Weight'
|
25 |
LANGS = "Languages"
|
|
|
20 |
CLEMSCORE = "Clemscore"
|
21 |
LATENCY = "Latency (s)"
|
22 |
PARAMS = "Parameters (B)"
|
23 |
+
DUMMY_PARAMS = "Parameters Dummy (B)"
|
24 |
RELEASE_DATE = 'Release Date'
|
25 |
OPEN_WEIGHT = 'Open Weight'
|
26 |
LANGS = "Languages"
|
src/filter_utils.py
CHANGED
@@ -84,30 +84,7 @@ def filter(df, language_list, parameters, input_price, output_price, multimodal,
|
|
84 |
df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))]
|
85 |
|
86 |
if not df.empty:
|
87 |
-
|
88 |
-
open_weight_true = df[
|
89 |
-
(df[tc.OPEN_WEIGHT] == True) &
|
90 |
-
(~df[tc.PARAMS].isna())
|
91 |
-
]
|
92 |
-
open_weight_false = df[
|
93 |
-
(df[tc.OPEN_WEIGHT] == False) |
|
94 |
-
(df[tc.PARAMS].isna()) |
|
95 |
-
(~df.index.isin(open_weight_true.index)) # Catch any remaining rows
|
96 |
-
]
|
97 |
-
|
98 |
-
# Verify no overlap and no data loss
|
99 |
-
assert len(df) == len(open_weight_true) + len(open_weight_false), "Data loss detected"
|
100 |
-
assert len(set(open_weight_true.index) & set(open_weight_false.index)) == 0, "Duplicate entries detected"
|
101 |
-
|
102 |
-
# Filter only the open weight models based on parameters
|
103 |
-
if not open_weight_true.empty:
|
104 |
-
filtered_open = open_weight_true[
|
105 |
-
(open_weight_true[tc.PARAMS] >= parameters[0]) &
|
106 |
-
(open_weight_true[tc.PARAMS] <= parameters[1])
|
107 |
-
]
|
108 |
-
|
109 |
-
# Combine filtered open weight models with unfiltered commercial models
|
110 |
-
df = pd.concat([filtered_open, open_weight_false])
|
111 |
|
112 |
if not df.empty: # Check if df is non-empty
|
113 |
df = df[(df[tc.INPUT] >= input_price[0]) & (df[tc.INPUT] <= input_price[1])]
|
@@ -125,8 +102,12 @@ def filter(df, language_list, parameters, input_price, output_price, multimodal,
|
|
125 |
if tc.VIDEO in multimodal:
|
126 |
df = df[df[tc.VIDEO] == True]
|
127 |
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
130 |
|
131 |
if not df.empty: # Check if df is non-empty
|
132 |
if tc.OPEN in open_weight and tc.COMM not in open_weight:
|
|
|
84 |
df = df[df[tc.LANGS].apply(lambda x: all(lang in x for lang in language_list))]
|
85 |
|
86 |
if not df.empty:
|
87 |
+
df = df[(df[tc.DUMMY_PARAMS] >= parameters[0]) & (df[tc.DUMMY_PARAMS] <= parameters[1])]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
if not df.empty: # Check if df is non-empty
|
90 |
df = df[(df[tc.INPUT] >= input_price[0]) & (df[tc.INPUT] <= input_price[1])]
|
|
|
102 |
if tc.VIDEO in multimodal:
|
103 |
df = df[df[tc.VIDEO] == True]
|
104 |
|
105 |
+
if not df.empty: # Check if df is non-empty
|
106 |
+
# Convert 'Context Size (k)' to numeric, coercing errors to NaN
|
107 |
+
context_size = pd.to_numeric(df['Context Size (k)'], errors='coerce').fillna(0)
|
108 |
+
|
109 |
+
# Apply the filter
|
110 |
+
df = df[(context_size >= context[0]) & (context_size <= context[1])]
|
111 |
|
112 |
if not df.empty: # Check if df is non-empty
|
113 |
if tc.OPEN in open_weight and tc.COMM not in open_weight:
|
src/process_data.py
CHANGED
@@ -183,6 +183,20 @@ def merge_data():
|
|
183 |
# Drop model_name column
|
184 |
merged_df.drop(columns=['model_name'], inplace=True)
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
return merged_df
|
187 |
|
188 |
if __name__=='__main__':
|
|
|
183 |
# Drop model_name column
|
184 |
merged_df.drop(columns=['model_name'], inplace=True)
|
185 |
|
186 |
+
# Clean up context and convert to integer
|
187 |
+
merged_df[tc.CONTEXT] = merged_df[tc.CONTEXT].astype(str).str.replace('k', '', regex=False)
|
188 |
+
merged_df[tc.CONTEXT] = pd.to_numeric(merged_df[tc.CONTEXT], errors='coerce').fillna(0).astype(int)
|
189 |
+
|
190 |
+
# Handle commercial model parameters / Set to max of open models
|
191 |
+
# Find the maximum value of tc.PARAMS where tc.OPEN_WEIGHT is True
|
192 |
+
max_params_value = merged_df.loc[merged_df[tc.OPEN_WEIGHT], tc.PARAMS].max()
|
193 |
+
|
194 |
+
# Create a new dummy PARAM column
|
195 |
+
merged_df[tc.DUMMY_PARAMS] = merged_df.apply(
|
196 |
+
lambda row: max_params_value if not row[tc.OPEN_WEIGHT] else row[tc.PARAMS],
|
197 |
+
axis=1
|
198 |
+
)
|
199 |
+
|
200 |
return merged_df
|
201 |
|
202 |
if __name__=='__main__':
|