Spaces:

Peiyan
/

GVAL-leaderboard

Sleeping

App Files Files Community

Peiyan commited on Jan 12

Commit

085ecee

verified ·

1 Parent(s): 39583d7

Upload 2 files

Browse files

Files changed (2) hide show

app.py +63 -0
evaluation.py +39 -0

app.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import gradio as gr
+import pandas as pd
+import os
+from evaluation import evaluate_model  # Import your evaluation function
+# Define the path where you want to save the leaderboard data
+leaderboard_file = "leaderboard.csv"
+# Check if leaderboard file exists, otherwise create an empty DataFrame
+if os.path.exists(leaderboard_file):
+    leaderboard = pd.read_csv(leaderboard_file)
+else:
+    leaderboard = pd.DataFrame(columns=["Model Name", "Score"])
+# Submit the evaluation and update the leaderboard
+def submit_evaluation(model_name, model_file):
+    """
+    Handles the model submission, evaluates it, and updates the leaderboard.
+    """
+    # Save the uploaded model to a folder
+    model_path = os.path.join("models", model_file.name)
+    model_file.save(model_path)
+    # Example test data (replace with your actual test dataset)
+    test_data = [
+        ("Example text 1", 0),  # (text, label)
+        ("Example text 2", 1),
+        # Add more test data here
+    ]
+    # Evaluate the model using your custom evaluation code
+    score = evaluate_model(model_path, test_data)
+    # Update the leaderboard
+    new_entry = {"Model Name": model_name, "Score": score}
+    global leaderboard
+    leaderboard = leaderboard.append(new_entry, ignore_index=True)
+    leaderboard_sorted = leaderboard.sort_values(by="Score", ascending=False)
+    # Save the updated leaderboard
+    leaderboard_sorted.to_csv(leaderboard_file, index=False)
+    # Return the sorted leaderboard as output
+    return leaderboard_sorted
+# Create the Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Model Evaluation Leaderboard")
+    # Model submission interface
+    with gr.Row():
+        model_name_input = gr.Textbox(label="Model Name", placeholder="Enter the model name")
+        model_file_input = gr.File(label="Upload Model (Hugging Face Model Format)", file_types=[".pt", ".bin", ".h5", ".zip"])
+    submit_button = gr.Button("Submit Evaluation")
+    # Leaderboard display area
+    leaderboard_display = gr.Dataframe(leaderboard)
+    submit_button.click(submit_evaluation, inputs=[model_name_input, model_file_input], outputs=[leaderboard_display])
+# Launch the interface
+demo.launch()

evaluation.py ADDED Viewed

	@@ -0,0 +1,39 @@

+# evaluation.py
+import torch
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from sklearn.metrics import accuracy_score
+def evaluate_model(model_path, test_data):
+    """
+    Evaluates the model based on test data and returns the score.
+    This function assumes a classification task and requires the test data in a specific format.
+    Args:
+    - model_path: Path to the model file (e.g., PyTorch model file)
+    - test_data: A list of tuples (text, label) for evaluation
+    Returns:
+    - score: Evaluation score (e.g., accuracy)
+    """
+    # Load model and tokenizer
+    model = AutoModelForSequenceClassification.from_pretrained(model_path)
+    tokenizer = AutoTokenizer.from_pretrained(model_path)
+    model.eval()
+    all_preds = []
+    all_labels = []
+    for text, label in test_data:
+        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+        outputs = model(**inputs)
+        logits = outputs.logits
+        predictions = torch.argmax(logits, dim=-1).item()
+        all_preds.append(predictions)
+        all_labels.append(label)
+    # Calculate accuracy (or any other metric)
+    accuracy = accuracy_score(all_labels, all_preds)
+    return accuracy