|
import gradio as gr |
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
from huggingface_hub import snapshot_download |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet" |
|
RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet" |
|
SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet" |
|
|
|
|
|
try: |
|
leaderboard_data = pd.read_parquet(LEADERBOARD_PATH) |
|
model_responses_data = pd.read_parquet(RESPONSES_PATH) |
|
section_results_data = pd.read_parquet(SECTION_RESULTS_PATH) |
|
except Exception as e: |
|
print(f"Error loading datasets: {e}") |
|
raise |
|
|
|
|
|
def filter_leaderboard(family=None, quantization_level=None): |
|
df = leaderboard_data.copy() |
|
if family: |
|
df = df[df["family"] == family] |
|
if quantization_level: |
|
df = df[df["quantization_level"] == quantization_level] |
|
return df |
|
|
|
def search_responses(query, model): |
|
filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)] |
|
selected_columns = ["bolum", "soru", "cevap", model + "_cevap"] |
|
return filtered[selected_columns] |
|
|
|
def plot_section_results(): |
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
avg_scores = section_results_data.mean(numeric_only=True) |
|
avg_scores.plot(kind="bar", ax=ax) |
|
ax.set_title("Average Section-Wise Performance") |
|
ax.set_ylabel("Accuracy (%)") |
|
ax.set_xlabel("Sections") |
|
return fig |
|
|
|
def add_new_model(model_name, base_model, revision, precision, weight_type, model_type): |
|
|
|
return f"Model '{model_name}' submitted successfully!" |
|
|
|
|
|
with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app: |
|
gr.HTML("<h1>π Turkish MMLU Leaderboard</h1>") |
|
gr.Markdown("Explore, evaluate, and compare AI model performance.") |
|
|
|
with gr.Tabs() as tabs: |
|
|
|
with gr.TabItem("Leaderboard"): |
|
family_filter = gr.Dropdown( |
|
choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False |
|
) |
|
quantization_filter = gr.Dropdown( |
|
choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level" |
|
) |
|
leaderboard_table = gr.DataFrame(leaderboard_data) |
|
gr.Button("Apply Filters").click( |
|
filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table |
|
) |
|
|
|
|
|
with gr.TabItem("Model Responses"): |
|
model_dropdown = gr.Dropdown( |
|
choices=leaderboard_data["model"].unique().tolist(), label="Select Model" |
|
) |
|
query_input = gr.Textbox(label="Search Query") |
|
responses_table = gr.DataFrame() |
|
gr.Button("Search").click( |
|
search_responses, inputs=[query_input, model_dropdown], outputs=responses_table |
|
) |
|
|
|
|
|
with gr.TabItem("Section Results"): |
|
gr.Plot(plot_section_results) |
|
gr.DataFrame(section_results_data) |
|
|
|
|
|
with gr.TabItem("Submit Model"): |
|
gr.Markdown("### Submit Your Model for Evaluation") |
|
model_name = gr.Textbox(label="Model Name") |
|
base_model = gr.Textbox(label="Base Model") |
|
revision = gr.Textbox(label="Revision", placeholder="main") |
|
precision = gr.Dropdown( |
|
choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16" |
|
) |
|
weight_type = gr.Dropdown( |
|
choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original" |
|
) |
|
model_type = gr.Dropdown( |
|
choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer" |
|
) |
|
submit_button = gr.Button("Submit") |
|
submission_output = gr.Markdown() |
|
submit_button.click( |
|
add_new_model, |
|
inputs=[model_name, base_model, revision, precision, weight_type, model_type], |
|
outputs=submission_output, |
|
) |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
scheduler.add_job( |
|
lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"), |
|
"interval", seconds=1800 |
|
) |
|
scheduler.start() |
|
|
|
|
|
app.queue(default_concurrency_limit=40).launch() |