ashutoshtitoria's picture
Update app.py
1ab056b verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("pparasurama/raceBERT-ethnicity")
model = AutoModelForSequenceClassification.from_pretrained("pparasurama/raceBERT-ethnicity")
# Mapping of model output IDs to ethnicity labels
id2label = {
0: "GreaterEuropean,British",
1: "GreaterEuropean,WestEuropean,French",
2: "GreaterEuropean,WestEuropean,Italian",
3: "GreaterEuropean,WestEuropean,Hispanic",
4: "GreaterEuropean,Jewish",
5: "GreaterEuropean,EastEuropean",
6: "Asian,IndianSubContinent",
7: "Asian,GreaterEastAsian,Japanese",
8: "GreaterAfrican,Muslim",
9: "Asian,GreaterEastAsian,EastAsian",
10: "GreaterEuropean,WestEuropean,Nordic",
11: "GreaterEuropean,WestEuropean,Germanic",
12: "GreaterAfrican,Africans"
}
# Function to make predictions based on the input name
def predict_ethnicity(name):
inputs = tokenizer(name, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)[0]
# Get top 5 predictions
top_preds = torch.topk(probabilities, 5)
# Prepare the output as a sorted human-friendly list
result = "\n".join([f"{id2label[idx.item()]}: {prob.item() * 100:.2f}%" for idx, prob in zip(top_preds.indices, top_preds.values)])
return result
# Gradio Interface
interface = gr.Interface(
fn=predict_ethnicity,
inputs=gr.Textbox(lines=1, placeholder="Enter a name"),
outputs="text",
title="TOPS Infosolutions Ethnicity Predictor - Kaleida",
description="Enter a person's name and get the predicted ethnicity breakdown.",
)
# Launch the Gradio app
interface.launch()