import gradio as gr import joblib from sklearn.feature_extraction.text import TfidfVectorizer # Load the saved model and vectorizer model = joblib.load('tunisian_arabiz_sentiment_analysis_model.pkl') vectorizer = joblib.load('tfidf_vectorizer.pkl') def predict_sentiment(text): text_vectorized = vectorizer.transform([text]) prediction = model.predict(text_vectorized)[0] probabilities = model.predict_proba(text_vectorized)[0] confidence = max(probabilities) sentiment = "Positive" if prediction == 1 else "Negative" return ( sentiment, f"{confidence:.2f}", f"The model predicts this text is {sentiment.lower()} with {confidence:.2%} confidence." ) # Function to get predictions for examples def get_example_predictions(examples): return [predict_sentiment(ex[0]) for ex in examples] # Example texts examples = [ ["3ajbetni barcha el film hedhi"], ["ma7abitch el akla mte3 el restaurant"], ["el jaw fi tounes a7la 7aja"], ["ennes el kol za3nin w ma3andhomch flous"] ] # Get predictions for examples example_predictions = get_example_predictions(examples) # Create formatted examples with predictions formatted_examples = [ [ex[0], f"{pred[0]} (Confidence: {pred[1]})"] for ex, pred in zip(examples, example_predictions) ] # Create Gradio interface iface = gr.Interface( fn=predict_sentiment, inputs=gr.Textbox(lines=3, placeholder="Enter Tunisian Arabiz text here..."), outputs=[ gr.Label(label="Predicted Sentiment"), gr.Label(label="Confidence Score"), gr.Textbox(label="Explanation") ], examples=formatted_examples, title="Tunisian Arabiz Sentiment Analysis", description=""" This model predicts the sentiment of Tunisian Arabiz text as either Positive or Negative. Tunisian Arabiz is a form of writing Arabic (specifically Tunisian dialect) using Latin characters and numbers. Example: - "3ajbetni" means "I liked it" - "7aja" means "thing" Try the examples below or enter your own text! """, article="""
Tunisian Flag

About the Model

This sentiment analysis model was trained on a dataset combining TuniziDataset and the Tunisian Dialect Corpus. It uses TF-IDF vectorization for feature extraction and Logistic Regression for classification.

Limitations

The model may not perform well on very colloquial expressions or new slang terms not present in the training data. It's also important to note that sentiment can be nuanced and context-dependent, which may not always be captured by this model.

""" ) # Launch the interface iface.launch()