Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
[ | |
{ | |
"model": "sabia-2-small", | |
"name": "Sabiá-2 Small", | |
"link": "https://www.maritaca.ai/", | |
"date": "2024-04-12", | |
"status": "full", | |
"main_language": "Portuguese", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.7172848145556333, | |
"bluex": 0.5549374130737135, | |
"oab_exams": 0.6364464692482916, | |
"assin2_sts": 0.7053302344881672, | |
"assin2_rte": 0.9121728362223306, | |
"faquad_nli": 0.7575848453041435, | |
"hatebr_offensive": 0.5025338637870607, | |
"portuguese_hate_speech": 0.4650217578860529, | |
"tweetsentbr": 0.533977453070735 | |
}, | |
"result_metrics_average": 0.6428099652929031, | |
"result_metrics_npm": 0.43960062672137007 | |
}, | |
{ | |
"model": "sabia-2-medium", | |
"name": "Sabiá-2 Medium", | |
"link": "https://www.maritaca.ai/", | |
"date": "2024-04-13", | |
"status": "full", | |
"main_language": "Portuguese", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.8180545836249126, | |
"bluex": 0.717663421418637, | |
"oab_exams": 0.7321184510250569, | |
"assin2_sts": 0.7804108376537757, | |
"assin2_rte": 0.923459363368553, | |
"faquad_nli": 0.7657657657657658, | |
"hatebr_offensive": 0.8349989882997386, | |
"portuguese_hate_speech": 0.7379326358571694, | |
"tweetsentbr": 0.7269533040381798 | |
}, | |
"result_metrics_average": 0.7819285945613098, | |
"result_metrics_npm": 0.6676121786922709 | |
}, | |
{ | |
"model": "gpt-3.5-turbo-0125", | |
"name": "GPT-3.5 Turbo (0125)", | |
"link": "https://www.openai.com/", | |
"date": "2024-03-08", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.7214835549335199, | |
"bluex": 0.6244784422809457, | |
"oab_exams": 0.5430523917995445, | |
"assin2_sts": 0.7378460201077941, | |
"assin2_rte": 0.8823038414050672, | |
"faquad_nli": 0.746353108609074, | |
"hatebr_offensive": 0.8056205941193919, | |
"portuguese_hate_speech": 0.7363692688971499, | |
"tweetsentbr": 0.7028981330613626 | |
}, | |
"result_metrics_average": 0.7222672616904278, | |
"result_metrics_npm": 0.5841504766165372 | |
}, | |
{ | |
"model": "claude-3-haiku-20240307", | |
"name": "Claude-3 Haiku (20240307)", | |
"link": "https://www.claude.ai/", | |
"date": "2024-04-13", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.7718684394681595, | |
"bluex": 0.6662030598052852, | |
"oab_exams": 0.626879271070615, | |
"assin2_sts": 0.7892124744168747, | |
"assin2_rte": 0.9184462138121732, | |
"faquad_nli": 0.6340996599941455, | |
"hatebr_offensive": 0.8023698759439051, | |
"portuguese_hate_speech": 0.7342166269560177, | |
"tweetsentbr": 0.5477486799750156 | |
}, | |
"result_metrics_average": 0.7212271446046878, | |
"result_metrics_npm": 0.5735261536314672 | |
}, | |
{ | |
"model": "gemini-1.0-pro", | |
"name": "Gemini 1.0 Pro", | |
"link": "https://ai.google.dev/", | |
"date": "2024-03-08", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.7130860741777467, | |
"bluex": 0.5869262865090403, | |
"oab_exams": 0.4988610478359909, | |
"assin2_sts": 0.7058831239763663, | |
"assin2_rte": 0.8945993304651698, | |
"faquad_nli": 0.7070913567220611, | |
"hatebr_offensive": 0.8086330094493972, | |
"portuguese_hate_speech": 0.699119105113102, | |
"tweetsentbr": 0.6803240476660983 | |
}, | |
"result_metrics_average": 0.6993914868794414, | |
"result_metrics_npm": 0.551208000273598 | |
}, | |
{ | |
"model": "gemini-1.5-pro-preview-0409", | |
"name": "Gemini 1.5 Pro Preview (0409)", | |
"link": "https://cloud.google.com/vertex-ai", | |
"date": "2024-04-15", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.8509447165850245, | |
"bluex": 0.7719054242002782, | |
"oab_exams": 0.6888382687927107, | |
"assin2_sts": 0.8159702278408203, | |
"assin2_rte": 0.9328989988467518, | |
"faquad_nli": 0.7290756302521009, | |
"hatebr_offensive": 0.8697698647467024, | |
"portuguese_hate_speech": 0.7539414414414414, | |
"tweetsentbr": 0.772785080895884 | |
}, | |
"result_metrics_average": 0.7984588504001905, | |
"result_metrics_npm": 0.6908188311933006 | |
}, | |
{ | |
"model": "deepseek-v2-chat", | |
"name": "DeepSeek-V2 Chat (API)", | |
"link": "https://www.deepseek.com/", | |
"date": "2024-05-18", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.7844646606018194, | |
"bluex": 0.6954102920723226, | |
"oab_exams": 0.564009111617312, | |
"assin2_sts": 0.8533174657651231, | |
"assin2_rte": 0.9440170304568147, | |
"faquad_nli": 0.7995469048381548, | |
"hatebr_offensive": 0.8842986491071644, | |
"portuguese_hate_speech": 0.7271736342651962, | |
"tweetsentbr": 0.6835304759163984 | |
}, | |
"result_metrics_average": 0.7706409138489229, | |
"result_metrics_npm": 0.655901521190756 | |
}, | |
{ | |
"model": "gemini-1.5-flash-preview-0514", | |
"name": "Gemini 1.5 Flash Preview (0514)", | |
"link": "https://cloud.google.com/vertex-ai", | |
"date": "2024-05-18", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.8264520643806857, | |
"bluex": 0.7482614742698191, | |
"oab_exams": 0.6419134396355353, | |
"assin2_sts": 0.841655158151231, | |
"assin2_rte": 0.9362097477374545, | |
"faquad_nli": 0.8092185592185592, | |
"hatebr_offensive": 0.9099110141445836, | |
"portuguese_hate_speech": 0.6875904275305673, | |
"tweetsentbr": 0.7219800292667018 | |
}, | |
"result_metrics_average": 0.7914657682594597, | |
"result_metrics_npm": 0.6834036936130392 | |
}, | |
{ | |
"model": "gemini-1.5-flash", | |
"name": "Gemini 1.5 Flash", | |
"link": "https://cloud.google.com/vertex-ai", | |
"date": "2024-08-09", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.8306508047585724, | |
"bluex": 0.7579972183588317, | |
"oab_exams": 0.6446469248291572, | |
"assin2_sts": 0.838806085610371, | |
"assin2_rte": 0.9366169973822607, | |
"faquad_nli": 0.7963910785668922, | |
"hatebr_offensive": 0.9092078461170015, | |
"portuguese_hate_speech": 0.6932563987219857, | |
"tweetsentbr": 0.7312948963367732 | |
}, | |
"result_metrics_average": 0.7932075834090939, | |
"result_metrics_npm": 0.6855338135928848 | |
}, | |
{ | |
"model": "gpt-4o-mini-2024-07-18", | |
"name": "GPT 4o Mini (2024-07-18)", | |
"link": "https://www.openai.com/", | |
"date": "2024-07-25", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.7669699090272918, | |
"bluex": 0.6842837273991655, | |
"oab_exams": 0.6013667425968109, | |
"assin2_sts": 0.7259038954527597, | |
"assin2_rte": 0.942809846745341, | |
"faquad_nli": 0.819807735300693, | |
"hatebr_offensive": 0.8682357029532165, | |
"portuguese_hate_speech": 0.7501413502853012, | |
"tweetsentbr": 0.7509303825869922 | |
}, | |
"result_metrics_average": 0.7678276991497301, | |
"result_metrics_npm": 0.6595966999910003 | |
}, | |
{ | |
"model": "nemotron-4-340b-instruct", | |
"name": "nvidia/Nemotron-4-340B-Instruct (Nvidia API)", | |
"link": "https://build.nvidia.com/nvidia/nemotron-4-340b-instruct", | |
"date": "2024-06-30", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "chat", | |
"params": 340.0, | |
"result_metrics": { | |
"enem_challenge": 0.6648005598320503, | |
"bluex": 0.6578581363004172, | |
"oab_exams": 0.7020501138952164, | |
"assin2_sts": 0.7857731021403329, | |
"assin2_rte": 0.9489354458928496, | |
"faquad_nli": 0.8194444444444444, | |
"hatebr_offensive": 0.8641580001234928, | |
"portuguese_hate_speech": 0.7761835184102864, | |
"tweetsentbr": 0.780880021326841 | |
}, | |
"result_metrics_average": 0.7777870380406591, | |
"result_metrics_npm": 0.6740728488043128 | |
}, | |
{ | |
"model": "llama_405b_instruct", | |
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)", | |
"link": "https://cloud.google.com/vertex-ai", | |
"date": "2024-08-20", | |
"status": "full", | |
"main_language": "English", | |
"model_type": "chat", | |
"params": 406.0, | |
"result_metrics": { | |
"enem_challenge": 0.8523442967109867, | |
"bluex": 0.8011126564673157, | |
"oab_exams": 0.7640091116173121, | |
"assin2_sts": 0.7888441732870783, | |
"assin2_rte": 0.6317630318610981, | |
"faquad_nli": 0.825063276593557, | |
"hatebr_offensive": 0.9073940659389119, | |
"portuguese_hate_speech": 0.7191480935512969, | |
"tweetsentbr": 0.7821434639106575 | |
}, | |
"result_metrics_average": 0.7857580188820238, | |
"result_metrics_npm": 0.6584973442501938 | |
}, | |
{ | |
"model": "sabia-3", | |
"name": "Sabiá-3", | |
"link": "https://www.maritaca.ai/", | |
"date": "2024-08-20", | |
"status": "full", | |
"main_language": "Portuguese", | |
"model_type": "proprietary", | |
"result_metrics": { | |
"enem_challenge": 0.8789363191042687, | |
"bluex": 0.7899860917941586, | |
"oab_exams": 0.8391799544419134, | |
"assin2_sts": 0.8253863689009022, | |
"assin2_rte": 0.9477034821619312, | |
"faquad_nli": 0.8243848812618203, | |
"hatebr_offensive": 0.5519158516393349, | |
"portuguese_hate_speech": 0.48273809523809524, | |
"tweetsentbr": 0.5632959814986498 | |
}, | |
"result_metrics_average": 0.744836336226786, | |
"result_metrics_npm": 0.5802643096708316 | |
} | |
] |