import gradio as gr
import joblib
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the saved model and vectorizer
model = joblib.load('tunisian_arabiz_sentiment_analysis_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

def predict_sentiment(text):
    if not text.strip():
        return (
            "No input provided",
            "N/A",
            "Please enter some text to get a sentiment prediction."
        )
    
    text_vectorized = vectorizer.transform([text])
    prediction = model.predict(text_vectorized)[0]
    probabilities = model.predict_proba(text_vectorized)[0]
    confidence = max(probabilities)
    
    sentiment = "Positive" if prediction == 1 else "Negative"
    
    return (
        sentiment,
        f"{confidence:.2f}",
        f"The model predicts this text is {sentiment.lower()} with {confidence:.2%} confidence."
    )

# Function to get predictions for examples
def get_example_predictions(examples):
    return [predict_sentiment(ex[0]) for ex in examples]

# Example texts
examples = [
    ["3jebni barcha el film hedha"],
    ["ma7abitch el mekla mte3 el restaurant"],
    ["el jaw fi tounes a7la 7aja"],
    ["ennes el kol te3ba w ma3andhomch flous"],
    ["كان جات الدنيا دنيا راني ساهرة في دار حماتي"],
    ["مبابي مانستعرف بيه مدريدي كان مانشوفو مركى هاتريك بمريول الريال"]
]

# Get predictions for examples
example_predictions = get_example_predictions(examples)

# Create formatted examples with predictions
formatted_examples = [
    [ex[0], f"{pred[0]} (Confidence: {pred[1]})"] 
    for ex, pred in zip(examples, example_predictions)
]

# Create Gradio interface
iface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="أدخل النص هنا... / Enter your text here..."),
    outputs=[
        gr.Label(label="Predicted Sentiment"),
        gr.Label(label="Confidence Score"),
        gr.Textbox(label="Explanation")
    ],
    examples=formatted_examples,
    title="Tunisian Arabiz Sentiment Analysis",
    description="""
    <p>This model predicts the sentiment of Tunisian text as either Positive or Negative. It works with both Tunisian Arabiz and standard Arabic script.</p>
        
    <h4>What is Tunisian Arabiz? / ما هي العربيزية التونسية؟</h4>
    <p>Tunisian Arabiz is a way of writing the Tunisian dialect using Latin characters and numbers. For example:</p>
    <ul>
        <li>"3ajbetni" means "I liked it""</li>
        <li>"7aja" means "thing" "</li>
        <li>"a3tini 9ahwa" means "give me a coffee""</li>
    </ul>
    
    <p>Try the examples below or enter your own text!</p>
    <p>!جرب الأمثلة أو أدخل نصك الخاص</p>
    """,
    article="""
    <h3>About the Model</h3>
    <p>This sentiment analysis model was trained on a combined dataset from TuniziDataset and the Tunisian Dialect Corpus. 
    It uses TF-IDF vectorization for feature extraction and Logistic Regression for classification.</p>
    
    <p>The model accepts Tunisian Arabiz written with Latin and Arabic script.</p>
    
    <h3>Limitations</h3>
    <p>Due to dataset limitations, neutral sentiment data was removed to achieve maximum performance. </p>
    <p>The model may not perform well on very colloquial expressions or new slang terms not present in the training data. 
    Sentiment can be nuanced and context-dependent, which may not always be captured accurately by this model.</p>
    <a href="https://github.com/RamiIbrahim2002/Tunisian-Arabiz/tree/main">Github</a>
    <center>
    <h2>This model is open-source, and contributions of additional datasets are welcome to improve its capabilities.</h2>
    
    <h2>هذا النموذج مفتوح المصدر، ونرحب بمساهمات مجموعات البيانات الإضافية لتحسين قدراته.</h2>
    </center>
    """
)

# Launch the interface
iface.launch()