Peiyan commited on
Commit
085ecee
·
verified ·
1 Parent(s): 39583d7

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +63 -0
  2. evaluation.py +39 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ from evaluation import evaluate_model # Import your evaluation function
5
+
6
+ # Define the path where you want to save the leaderboard data
7
+ leaderboard_file = "leaderboard.csv"
8
+
9
+ # Check if leaderboard file exists, otherwise create an empty DataFrame
10
+ if os.path.exists(leaderboard_file):
11
+ leaderboard = pd.read_csv(leaderboard_file)
12
+ else:
13
+ leaderboard = pd.DataFrame(columns=["Model Name", "Score"])
14
+
15
+ # Submit the evaluation and update the leaderboard
16
+ def submit_evaluation(model_name, model_file):
17
+ """
18
+ Handles the model submission, evaluates it, and updates the leaderboard.
19
+ """
20
+ # Save the uploaded model to a folder
21
+ model_path = os.path.join("models", model_file.name)
22
+ model_file.save(model_path)
23
+
24
+ # Example test data (replace with your actual test dataset)
25
+ test_data = [
26
+ ("Example text 1", 0), # (text, label)
27
+ ("Example text 2", 1),
28
+ # Add more test data here
29
+ ]
30
+
31
+ # Evaluate the model using your custom evaluation code
32
+ score = evaluate_model(model_path, test_data)
33
+
34
+ # Update the leaderboard
35
+ new_entry = {"Model Name": model_name, "Score": score}
36
+ global leaderboard
37
+ leaderboard = leaderboard.append(new_entry, ignore_index=True)
38
+ leaderboard_sorted = leaderboard.sort_values(by="Score", ascending=False)
39
+
40
+ # Save the updated leaderboard
41
+ leaderboard_sorted.to_csv(leaderboard_file, index=False)
42
+
43
+ # Return the sorted leaderboard as output
44
+ return leaderboard_sorted
45
+
46
+ # Create the Gradio interface
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# Model Evaluation Leaderboard")
49
+
50
+ # Model submission interface
51
+ with gr.Row():
52
+ model_name_input = gr.Textbox(label="Model Name", placeholder="Enter the model name")
53
+ model_file_input = gr.File(label="Upload Model (Hugging Face Model Format)", file_types=[".pt", ".bin", ".h5", ".zip"])
54
+
55
+ submit_button = gr.Button("Submit Evaluation")
56
+
57
+ # Leaderboard display area
58
+ leaderboard_display = gr.Dataframe(leaderboard)
59
+
60
+ submit_button.click(submit_evaluation, inputs=[model_name_input, model_file_input], outputs=[leaderboard_display])
61
+
62
+ # Launch the interface
63
+ demo.launch()
evaluation.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # evaluation.py
2
+
3
+ import torch
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+ from sklearn.metrics import accuracy_score
6
+
7
+ def evaluate_model(model_path, test_data):
8
+ """
9
+ Evaluates the model based on test data and returns the score.
10
+ This function assumes a classification task and requires the test data in a specific format.
11
+
12
+ Args:
13
+ - model_path: Path to the model file (e.g., PyTorch model file)
14
+ - test_data: A list of tuples (text, label) for evaluation
15
+
16
+ Returns:
17
+ - score: Evaluation score (e.g., accuracy)
18
+ """
19
+
20
+ # Load model and tokenizer
21
+ model = AutoModelForSequenceClassification.from_pretrained(model_path)
22
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
23
+
24
+ model.eval()
25
+ all_preds = []
26
+ all_labels = []
27
+
28
+ for text, label in test_data:
29
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
30
+ outputs = model(**inputs)
31
+ logits = outputs.logits
32
+ predictions = torch.argmax(logits, dim=-1).item()
33
+
34
+ all_preds.append(predictions)
35
+ all_labels.append(label)
36
+
37
+ # Calculate accuracy (or any other metric)
38
+ accuracy = accuracy_score(all_labels, all_preds)
39
+ return accuracy