RamiIbrahim
commited on
Commit
•
e45c97d
1
Parent(s):
cdd58a5
Update app.py
Browse files
app.py
CHANGED
@@ -1,78 +1,31 @@
|
|
1 |
-
import
|
2 |
-
import numpy as np
|
3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
-
from sklearn.model_selection import train_test_split
|
5 |
from sklearn.linear_model import LogisticRegression
|
6 |
-
from sklearn.metrics import accuracy_score
|
7 |
-
import nltk
|
8 |
import gradio as gr
|
9 |
-
import joblib
|
10 |
-
import os
|
11 |
-
|
12 |
-
nltk.download('stopwords', quiet=True)
|
13 |
-
|
14 |
-
MODEL_PATH = 'sentiment_model.joblib'
|
15 |
-
VECTORIZER_PATH = 'vectorizer.joblib'
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
X = clean_dataset['InputText'].values
|
22 |
-
Y = clean_dataset['SentimentLabel'].values
|
23 |
-
|
24 |
-
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
|
25 |
-
|
26 |
-
vectorizer = TfidfVectorizer()
|
27 |
-
X_train = vectorizer.fit_transform(X_train)
|
28 |
-
X_test = vectorizer.transform(X_test)
|
29 |
-
|
30 |
-
model = LogisticRegression(max_iter=1000)
|
31 |
-
model.fit(X_train, Y_train)
|
32 |
-
|
33 |
-
# Save the model and vectorizer
|
34 |
-
joblib.dump(model, MODEL_PATH)
|
35 |
-
joblib.dump(vectorizer, VECTORIZER_PATH)
|
36 |
-
|
37 |
-
# Calculate accuracies
|
38 |
-
train_accuracy = accuracy_score(Y_train, model.predict(X_train))
|
39 |
-
test_accuracy = accuracy_score(Y_test, model.predict(X_test))
|
40 |
-
|
41 |
-
return model, vectorizer, train_accuracy, test_accuracy
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
vectorizer = joblib.load(VECTORIZER_PATH)
|
47 |
-
return model, vectorizer
|
48 |
-
return None, None
|
49 |
-
|
50 |
-
def predict_sentiment(input_text):
|
51 |
-
model, vectorizer = load_model()
|
52 |
-
|
53 |
-
if model is None or vectorizer is None:
|
54 |
-
model, vectorizer, train_accuracy, test_accuracy = train_model()
|
55 |
-
print(f"Model trained. Train accuracy: {train_accuracy:.4f}, Test accuracy: {test_accuracy:.4f}")
|
56 |
-
|
57 |
-
# Transform input text
|
58 |
-
input_vector = vectorizer.transform([input_text])
|
59 |
-
|
60 |
-
# Predict
|
61 |
prediction = model.predict(input_vector)[0]
|
62 |
probabilities = model.predict_proba(input_vector)[0]
|
63 |
-
|
64 |
sentiment = "Positive" if prediction == 1 else "Negative"
|
65 |
confidence = probabilities[1] if prediction == 1 else probabilities[0]
|
66 |
-
|
67 |
-
return f"Sentiment: {sentiment}\nConfidence: {confidence:.4f}"
|
68 |
|
69 |
-
|
|
|
|
|
70 |
iface = gr.Interface(
|
71 |
fn=predict_sentiment,
|
72 |
-
inputs="text",
|
73 |
-
outputs="
|
74 |
-
title="Sentiment Analysis
|
75 |
-
description="
|
76 |
)
|
77 |
|
78 |
-
|
|
|
|
1 |
+
import joblib
|
|
|
2 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
3 |
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
4 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load the model
|
7 |
+
model = joblib.load('tunisian_arabiz_sentiment_analysis_model.pkl')
|
8 |
+
vectorizer = joblib.load('tfidf_vectorizer.pkl')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# Function to predict sentiment
|
11 |
+
def predict_sentiment(text):
|
12 |
+
input_vector = vectorizer.transform([text])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
prediction = model.predict(input_vector)[0]
|
14 |
probabilities = model.predict_proba(input_vector)[0]
|
15 |
+
|
16 |
sentiment = "Positive" if prediction == 1 else "Negative"
|
17 |
confidence = probabilities[1] if prediction == 1 else probabilities[0]
|
|
|
|
|
18 |
|
19 |
+
return f"Sentiment: {sentiment}, Confidence: {confidence:.4f}"
|
20 |
+
|
21 |
+
# Define Gradio interface
|
22 |
iface = gr.Interface(
|
23 |
fn=predict_sentiment,
|
24 |
+
inputs=gr.inputs.Textbox(lines=5, label="Enter text in Tunisian Arabizi:"),
|
25 |
+
outputs=gr.outputs.Textbox(label="Sentiment Prediction"),
|
26 |
+
title="Tunisian Arabizi Sentiment Analysis",
|
27 |
+
description="Predict sentiment (Positive/Negative) of Tunisian Arabizi text."
|
28 |
)
|
29 |
|
30 |
+
# Launch the Gradio interface
|
31 |
+
iface.launch()
|