RamiIbrahim commited on
Commit
e45c97d
1 Parent(s): cdd58a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -64
app.py CHANGED
@@ -1,78 +1,31 @@
1
- import pandas as pd
2
- import numpy as np
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
- from sklearn.model_selection import train_test_split
5
  from sklearn.linear_model import LogisticRegression
6
- from sklearn.metrics import accuracy_score
7
- import nltk
8
  import gradio as gr
9
- import joblib
10
- import os
11
-
12
- nltk.download('stopwords', quiet=True)
13
-
14
- MODEL_PATH = 'sentiment_model.joblib'
15
- VECTORIZER_PATH = 'vectorizer.joblib'
16
 
17
- def train_model():
18
- df = pd.read_csv("TuniziDataset.csv")
19
- clean_dataset = df.drop_duplicates()
20
-
21
- X = clean_dataset['InputText'].values
22
- Y = clean_dataset['SentimentLabel'].values
23
-
24
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
25
-
26
- vectorizer = TfidfVectorizer()
27
- X_train = vectorizer.fit_transform(X_train)
28
- X_test = vectorizer.transform(X_test)
29
-
30
- model = LogisticRegression(max_iter=1000)
31
- model.fit(X_train, Y_train)
32
-
33
- # Save the model and vectorizer
34
- joblib.dump(model, MODEL_PATH)
35
- joblib.dump(vectorizer, VECTORIZER_PATH)
36
-
37
- # Calculate accuracies
38
- train_accuracy = accuracy_score(Y_train, model.predict(X_train))
39
- test_accuracy = accuracy_score(Y_test, model.predict(X_test))
40
-
41
- return model, vectorizer, train_accuracy, test_accuracy
42
 
43
- def load_model():
44
- if os.path.exists(MODEL_PATH) and os.path.exists(VECTORIZER_PATH):
45
- model = joblib.load(MODEL_PATH)
46
- vectorizer = joblib.load(VECTORIZER_PATH)
47
- return model, vectorizer
48
- return None, None
49
-
50
- def predict_sentiment(input_text):
51
- model, vectorizer = load_model()
52
-
53
- if model is None or vectorizer is None:
54
- model, vectorizer, train_accuracy, test_accuracy = train_model()
55
- print(f"Model trained. Train accuracy: {train_accuracy:.4f}, Test accuracy: {test_accuracy:.4f}")
56
-
57
- # Transform input text
58
- input_vector = vectorizer.transform([input_text])
59
-
60
- # Predict
61
  prediction = model.predict(input_vector)[0]
62
  probabilities = model.predict_proba(input_vector)[0]
63
-
64
  sentiment = "Positive" if prediction == 1 else "Negative"
65
  confidence = probabilities[1] if prediction == 1 else probabilities[0]
66
-
67
- return f"Sentiment: {sentiment}\nConfidence: {confidence:.4f}"
68
 
69
- # Gradio Interface
 
 
70
  iface = gr.Interface(
71
  fn=predict_sentiment,
72
- inputs="text",
73
- outputs="text",
74
- title="Sentiment Analysis Predictor",
75
- description="Enter a text to predict its sentiment."
76
  )
77
 
78
- iface.launch()
 
 
1
+ import joblib
 
2
  from sklearn.feature_extraction.text import TfidfVectorizer
 
3
  from sklearn.linear_model import LogisticRegression
 
 
4
  import gradio as gr
 
 
 
 
 
 
 
5
 
6
+ # Load the model
7
+ model = joblib.load('tunisian_arabiz_sentiment_analysis_model.pkl')
8
+ vectorizer = joblib.load('tfidf_vectorizer.pkl')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ # Function to predict sentiment
11
+ def predict_sentiment(text):
12
+ input_vector = vectorizer.transform([text])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  prediction = model.predict(input_vector)[0]
14
  probabilities = model.predict_proba(input_vector)[0]
15
+
16
  sentiment = "Positive" if prediction == 1 else "Negative"
17
  confidence = probabilities[1] if prediction == 1 else probabilities[0]
 
 
18
 
19
+ return f"Sentiment: {sentiment}, Confidence: {confidence:.4f}"
20
+
21
+ # Define Gradio interface
22
  iface = gr.Interface(
23
  fn=predict_sentiment,
24
+ inputs=gr.inputs.Textbox(lines=5, label="Enter text in Tunisian Arabizi:"),
25
+ outputs=gr.outputs.Textbox(label="Sentiment Prediction"),
26
+ title="Tunisian Arabizi Sentiment Analysis",
27
+ description="Predict sentiment (Positive/Negative) of Tunisian Arabizi text."
28
  )
29
 
30
+ # Launch the Gradio interface
31
+ iface.launch()