|
import gradio as gr |
|
|
|
from src.utils import model_hyperlink, process_score |
|
|
|
LEADERBOARD_COLUMN_TO_DATATYPE = { |
|
|
|
"Model π€": "markdown", |
|
"Experiment π§ͺ": "str", |
|
|
|
"Prefill (s)": "number", |
|
"Decode (tokens/s)": "number", |
|
"Memory (MB)": "number", |
|
"Energy (tokens/kWh)": "number", |
|
|
|
"Backend π": "str", |
|
"Precision π₯": "str", |
|
"Quantization ποΈ": "str", |
|
"Attention ποΈ": "str", |
|
"Kernel βοΈ": "str", |
|
|
|
|
|
|
|
"Open LLM Score (%)": "number", |
|
"End-to-End (s)": "number", |
|
"Architecture ποΈ": "str", |
|
"Params (B)": "number", |
|
} |
|
|
|
|
|
PRIMARY_COLUMNS = [ |
|
"Model π€", |
|
"Experiment π§ͺ", |
|
"Prefill (s)", |
|
"Decode (tokens/s)", |
|
"Memory (MB)", |
|
"Energy (tokens/kWh)", |
|
"Open LLM Score (%)", |
|
] |
|
|
|
|
|
CAPABILITY_COLUMNS = [ |
|
"Grounding β‘οΈ", |
|
"Instruction Following π", |
|
"Planning π
", |
|
"Reasoning π‘", |
|
"Refinement π©", |
|
"Safety β οΈ", |
|
"Theory of Mind π€", |
|
"Tool Usage π οΈ", |
|
"Multilingual π¬π«", |
|
] |
|
|
|
|
|
BGB_COLUMN_MAPPING = { |
|
"model_name_or_path": "Model π€", |
|
"average": "Average", |
|
"grounding": "Grounding β‘οΈ", |
|
"instruction_following": "Instruction Following π", |
|
"planning": "Planning π
", |
|
"reasoning": "Reasoning π‘", |
|
"refinement": "Refinement π©", |
|
"safety": "Safety β οΈ", |
|
"theory_of_mind": "Theory of Mind π€", |
|
"tool_usage": "Tool Usage π οΈ", |
|
"multilingual": "Multilingual π¬π«", |
|
"model_params": "Model Params (B)", |
|
"model_type": "Model Type", |
|
} |
|
|
|
|
|
BGB_COLUMN_TO_DATATYPE = { |
|
"Model π€": "markdown", |
|
"Average": "number", |
|
"Grounding β‘οΈ": "number", |
|
"Instruction Following π": "number", |
|
"Planning π
": "number", |
|
"Reasoning π‘": "number", |
|
"Refinement π©": "number", |
|
"Safety β οΈ": "number", |
|
"Theory of Mind π€": "number", |
|
"Tool Usage π οΈ": "number", |
|
"Multilingual π¬π«": "number", |
|
"Model Params (B)": "number", |
|
"Model Type": "str", |
|
} |
|
|
|
|
|
def process_model(model_name): |
|
link = f"https://huggingface.co/{model_name}" |
|
return model_hyperlink(link, model_name) |
|
|
|
|
|
|
|
def process_bgb_model(row): |
|
model_name = row.iloc[0] |
|
model_type = row.iloc[1] |
|
|
|
if model_type == "Base" or model_type == "Chat": |
|
link = f"https://huggingface.co/{model_name}" |
|
return model_hyperlink(link, model_name) |
|
elif model_type == "Proprietary": |
|
|
|
api_model_2_link = { |
|
"gpt-3.5-turbo-1106": "https://platform.openai.com/docs/models/gpt-3-5", |
|
"gpt-3.5-turbo-0125": "https://platform.openai.com/docs/models/gpt-3-5", |
|
"gpt-4-0125-preview": "https://openai.com/blog/new-models-and-developer-products-announced-at-devday", |
|
"gpt-4-1106-preview": "https://openai.com/blog/new-models-and-developer-products-announced-at-devday", |
|
"gpt-4-turbo-2024-04-09": "https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4", |
|
"gpt-4o-2024-05-13": "https://openai.com/index/hello-gpt-4o/", |
|
"claude-3-haiku-20240307": "https://www.anthropic.com/news/claude-3-family", |
|
"claude-3-opus-20240229": "https://www.anthropic.com/news/claude-3-family", |
|
"claude-3-sonnet-20240229": "https://www.anthropic.com/news/claude-3-family", |
|
"mistral-large": "https://mistral.ai/news/mistral-large/", |
|
"mistral-medium": "https://mistral.ai/news/la-plateforme/", |
|
"gemini-1.0-pro": "https://deepmind.google/technologies/gemini/pro/", |
|
"gemini-pro-1.5": "https://deepmind.google/technologies/gemini/pro/", |
|
"google/gemini-flash-1.5": "https://deepmind.google/technologies/gemini/flash/", |
|
} |
|
|
|
link = api_model_2_link[model_name] |
|
return model_hyperlink(link, model_name) |
|
|
|
else: |
|
raise NotImplementedError(f"Model type {model_type} not implemented") |
|
|
|
|
|
def get_leaderboard_df(llm_perf_df): |
|
df = llm_perf_df.copy() |
|
|
|
df["Model π€"] = df["Model π€"].apply(process_bgb_model) |
|
|
|
df["Open LLM Score (%)"] = df.apply(lambda x: process_score(x["Open LLM Score (%)"], x["Quantization ποΈ"]), axis=1) |
|
return df |
|
|
|
|
|
def get_bgb_leaderboard_df(eval_df): |
|
df = eval_df.copy() |
|
|
|
df["Model π€"] = df[["Model π€", "Model Type"]].apply(process_bgb_model, axis=1) |
|
return df |
|
|
|
|
|
def create_leaderboard_table(llm_perf_df): |
|
|
|
leaderboard_df = get_leaderboard_df(llm_perf_df) |
|
|
|
|
|
with gr.Row(): |
|
search_bar = gr.Textbox( |
|
label="Model π€", |
|
info="π Search for a model name", |
|
elem_id="search-bar", |
|
) |
|
|
|
with gr.Row(): |
|
columns_checkboxes = gr.CheckboxGroup( |
|
label="Columns π", |
|
value=PRIMARY_COLUMNS, |
|
choices=list(LEADERBOARD_COLUMN_TO_DATATYPE.keys()), |
|
info="βοΈ Select the columns to display", |
|
elem_id="columns-checkboxes", |
|
) |
|
|
|
leaderboard_table = gr.components.Dataframe( |
|
value=leaderboard_df[PRIMARY_COLUMNS], |
|
datatype=list(LEADERBOARD_COLUMN_TO_DATATYPE.values()), |
|
headers=list(LEADERBOARD_COLUMN_TO_DATATYPE.keys()), |
|
elem_id="leaderboard-table", |
|
) |
|
|
|
return search_bar, columns_checkboxes, leaderboard_table |
|
|
|
|
|
def create_bgb_leaderboard_table(eval_df): |
|
|
|
bgb_leaderboard_df = get_bgb_leaderboard_df(eval_df) |
|
|
|
|
|
with gr.Row(): |
|
search_bar = gr.Textbox( |
|
label="Model π€", |
|
info="π Search for a model name", |
|
elem_id="search-bar", |
|
) |
|
|
|
with gr.Row(): |
|
type_checkboxes = gr.CheckboxGroup( |
|
label="Model Type", |
|
value=["Base", "Chat", "Proprietary"], |
|
choices=["Base", "Chat", "Proprietary"], |
|
info="βοΈ Select the capabilities to display", |
|
elem_id="type-checkboxes", |
|
) |
|
|
|
with gr.Row(): |
|
param_slider = gr.Slider( |
|
minimum=0, maximum=150, value=7, step=1, interactive=True, label="Model Params (B)", elem_id="param-slider" |
|
) |
|
|
|
|
|
with gr.Row(): |
|
columns_checkboxes = gr.CheckboxGroup( |
|
label="Capabilities π", |
|
value=CAPABILITY_COLUMNS, |
|
choices=CAPABILITY_COLUMNS, |
|
info="βοΈ Select the capabilities to display", |
|
elem_id="columns-checkboxes", |
|
) |
|
|
|
|
|
bgb_leaderboard_table = gr.components.Dataframe( |
|
value=bgb_leaderboard_df[list(BGB_COLUMN_MAPPING.values())], |
|
datatype=list(BGB_COLUMN_TO_DATATYPE.values()), |
|
headers=list(BGB_COLUMN_MAPPING.keys()), |
|
elem_id="leaderboard-table", |
|
) |
|
|
|
return search_bar, columns_checkboxes, type_checkboxes, param_slider, bgb_leaderboard_table |
|
|