Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +63 -0
- evaluation.py +39 -0
app.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
from evaluation import evaluate_model # Import your evaluation function
|
5 |
+
|
6 |
+
# Define the path where you want to save the leaderboard data
|
7 |
+
leaderboard_file = "leaderboard.csv"
|
8 |
+
|
9 |
+
# Check if leaderboard file exists, otherwise create an empty DataFrame
|
10 |
+
if os.path.exists(leaderboard_file):
|
11 |
+
leaderboard = pd.read_csv(leaderboard_file)
|
12 |
+
else:
|
13 |
+
leaderboard = pd.DataFrame(columns=["Model Name", "Score"])
|
14 |
+
|
15 |
+
# Submit the evaluation and update the leaderboard
|
16 |
+
def submit_evaluation(model_name, model_file):
|
17 |
+
"""
|
18 |
+
Handles the model submission, evaluates it, and updates the leaderboard.
|
19 |
+
"""
|
20 |
+
# Save the uploaded model to a folder
|
21 |
+
model_path = os.path.join("models", model_file.name)
|
22 |
+
model_file.save(model_path)
|
23 |
+
|
24 |
+
# Example test data (replace with your actual test dataset)
|
25 |
+
test_data = [
|
26 |
+
("Example text 1", 0), # (text, label)
|
27 |
+
("Example text 2", 1),
|
28 |
+
# Add more test data here
|
29 |
+
]
|
30 |
+
|
31 |
+
# Evaluate the model using your custom evaluation code
|
32 |
+
score = evaluate_model(model_path, test_data)
|
33 |
+
|
34 |
+
# Update the leaderboard
|
35 |
+
new_entry = {"Model Name": model_name, "Score": score}
|
36 |
+
global leaderboard
|
37 |
+
leaderboard = leaderboard.append(new_entry, ignore_index=True)
|
38 |
+
leaderboard_sorted = leaderboard.sort_values(by="Score", ascending=False)
|
39 |
+
|
40 |
+
# Save the updated leaderboard
|
41 |
+
leaderboard_sorted.to_csv(leaderboard_file, index=False)
|
42 |
+
|
43 |
+
# Return the sorted leaderboard as output
|
44 |
+
return leaderboard_sorted
|
45 |
+
|
46 |
+
# Create the Gradio interface
|
47 |
+
with gr.Blocks() as demo:
|
48 |
+
gr.Markdown("# Model Evaluation Leaderboard")
|
49 |
+
|
50 |
+
# Model submission interface
|
51 |
+
with gr.Row():
|
52 |
+
model_name_input = gr.Textbox(label="Model Name", placeholder="Enter the model name")
|
53 |
+
model_file_input = gr.File(label="Upload Model (Hugging Face Model Format)", file_types=[".pt", ".bin", ".h5", ".zip"])
|
54 |
+
|
55 |
+
submit_button = gr.Button("Submit Evaluation")
|
56 |
+
|
57 |
+
# Leaderboard display area
|
58 |
+
leaderboard_display = gr.Dataframe(leaderboard)
|
59 |
+
|
60 |
+
submit_button.click(submit_evaluation, inputs=[model_name_input, model_file_input], outputs=[leaderboard_display])
|
61 |
+
|
62 |
+
# Launch the interface
|
63 |
+
demo.launch()
|
evaluation.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# evaluation.py
|
2 |
+
|
3 |
+
import torch
|
4 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
5 |
+
from sklearn.metrics import accuracy_score
|
6 |
+
|
7 |
+
def evaluate_model(model_path, test_data):
|
8 |
+
"""
|
9 |
+
Evaluates the model based on test data and returns the score.
|
10 |
+
This function assumes a classification task and requires the test data in a specific format.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
- model_path: Path to the model file (e.g., PyTorch model file)
|
14 |
+
- test_data: A list of tuples (text, label) for evaluation
|
15 |
+
|
16 |
+
Returns:
|
17 |
+
- score: Evaluation score (e.g., accuracy)
|
18 |
+
"""
|
19 |
+
|
20 |
+
# Load model and tokenizer
|
21 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
23 |
+
|
24 |
+
model.eval()
|
25 |
+
all_preds = []
|
26 |
+
all_labels = []
|
27 |
+
|
28 |
+
for text, label in test_data:
|
29 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
30 |
+
outputs = model(**inputs)
|
31 |
+
logits = outputs.logits
|
32 |
+
predictions = torch.argmax(logits, dim=-1).item()
|
33 |
+
|
34 |
+
all_preds.append(predictions)
|
35 |
+
all_labels.append(label)
|
36 |
+
|
37 |
+
# Calculate accuracy (or any other metric)
|
38 |
+
accuracy = accuracy_score(all_labels, all_preds)
|
39 |
+
return accuracy
|