Spaces:
Build error
Build error
import requests | |
import json | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import plotly.graph_objects as go | |
import plotly.express as px | |
import pandas as pd | |
from sklearn.metrics import confusion_matrix | |
from datasets import load_dataset | |
def load_model(endpoint: str): | |
tokenizer = AutoTokenizer.from_pretrained(endpoint) | |
model = AutoModelForSequenceClassification.from_pretrained(endpoint) | |
return tokenizer, model | |
def test_model(tokenizer, model, test_data: list, label_map: dict): | |
results = [] | |
for text, true_label in test_data: | |
inputs = tokenizer(text, return_tensors="pt", | |
truncation=True, padding=True) | |
outputs = model(**inputs) | |
pred_label = label_map[int(outputs.logits.argmax(dim=-1))] | |
results.append((text, true_label, pred_label)) | |
return results | |
def generate_report_card(results, label_map): | |
true_labels = [r[1] for r in results] | |
pred_labels = [r[2] for r in results] | |
cm = confusion_matrix(true_labels, pred_labels, | |
labels=list(label_map.values())) | |
fig = go.Figure( | |
data=go.Heatmap( | |
z=cm, | |
x=list(label_map.values()), | |
y=list(label_map.values()), | |
colorscale='Viridis', | |
colorbar=dict(title='Number of Samples') | |
), | |
layout=go.Layout( | |
title='Confusion Matrix', | |
xaxis=dict(title='Predicted Labels'), | |
yaxis=dict(title='True Labels', autorange='reversed') | |
) | |
) | |
fig.show() | |
def load_sst2_data(split="test"): | |
dataset = load_dataset("glue", "sst2", split=split) | |
data = [(item["sentence"], "positive" if item["label"] == 1 else "negative") | |
for item in dataset] | |
return data | |
# Define your model endpoint and label map | |
# model_endpoint = "your-model-endpoint" | |
# Modify this according to your model's labels | |
# label_map = {0: "label0", 1: "label1"} | |
model_endpoint = "distilbert-base-uncased-finetuned-sst-2-english" | |
label_map = {0: "negative", 1: "positive"} | |
# Load the model and tokenizer | |
tokenizer, model = load_model(model_endpoint) | |
# Prepare your test data (list of tuples containing text and true label) | |
#test_data = [ | |
# ("Sample text 1", "label0"), | |
# ("Sample text 2", "label1"), | |
# # Add more test samples here | |
#] | |
# Load the test data from the SST-2 dataset | |
test_data = load_sst2_data() | |
# Use a smaller subset of test_data for a quicker demonstration (optional) | |
test_data = test_data[:100] | |
# Test the model and generate results | |
results = test_model(tokenizer, model, test_data, label_map) | |
# Generate the visual report card | |
generate_report_card(results, label_map) | |