tunisian-arabiz / app.py
RamiIbrahim's picture
Update app.py
0c8ac4b verified
raw
history blame
2.88 kB
import gradio as gr
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer
# Load the saved model and vectorizer
model = joblib.load('tunisian_arabiz_sentiment_analysis_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
def predict_sentiment(text):
text_vectorized = vectorizer.transform([text])
prediction = model.predict(text_vectorized)[0]
probabilities = model.predict_proba(text_vectorized)[0]
confidence = max(probabilities)
sentiment = "Positive" if prediction == 1 else "Negative"
return (
sentiment,
f"{confidence:.2f}",
f"The model predicts this text is {sentiment.lower()} with {confidence:.2%} confidence."
)
# Function to get predictions for examples
def get_example_predictions(examples):
return [predict_sentiment(ex[0]) for ex in examples]
# Example texts
examples = [
["3ajbetni barcha el film hedhi"],
["ma7abitch el akla mte3 el restaurant"],
["el jaw fi tounes a7la 7aja"],
["ennes el kol za3nin w ma3andhomch flous"]
]
# Get predictions for examples
example_predictions = get_example_predictions(examples)
# Create formatted examples with predictions
formatted_examples = [
[ex[0], f"{pred[0]} (Confidence: {pred[1]})"]
for ex, pred in zip(examples, example_predictions)
]
# Create Gradio interface
iface = gr.Interface(
fn=predict_sentiment,
inputs=gr.Textbox(lines=3, placeholder="Enter Tunisian Arabiz text here..."),
outputs=[
gr.Label(label="Predicted Sentiment"),
gr.Label(label="Confidence Score"),
gr.Textbox(label="Explanation")
],
examples=formatted_examples,
title="Tunisian Arabiz Sentiment Analysis",
description="""
This model predicts the sentiment of Tunisian Arabiz text as either Positive or Negative.
Tunisian Arabiz is a form of writing Arabic (specifically Tunisian dialect) using Latin characters and numbers.
Example:
- "3ajbetni" means "I liked it"
- "7aja" means "thing"
Try the examples below or enter your own text!
""",
article="""
<div style="text-align: center;">
<img src="https://upload.wikimedia.org/wikipedia/commons/c/ce/Flag_of_Tunisia.svg" alt="Tunisian Flag" style="width:150px;"/>
</div>
<h3>About the Model</h3>
<p>This sentiment analysis model was trained on a dataset combining TuniziDataset and the Tunisian Dialect Corpus.
It uses TF-IDF vectorization for feature extraction and Logistic Regression for classification.</p>
<h3>Limitations</h3>
<p>The model may not perform well on very colloquial expressions or new slang terms not present in the training data.
It's also important to note that sentiment can be nuanced and context-dependent, which may not always be captured by this model.</p>
"""
)
# Launch the interface
iface.launch()