Artemis-IA commited on
Commit
3daec5b
1 Parent(s): 8569b65

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import joblib
5
+ import spacy
6
+ from wordcloud import WordCloud
7
+ from io import StringIO, BytesIO
8
+ import mimetypes
9
+ from transformers import CamembertForSequenceClassification, CamembertTokenizer
10
+ import torch
11
+
12
+ # Model Loading
13
+ model = joblib.load('model.pkl')
14
+ vectorizer = joblib.load('vectorizer.pkl')
15
+
16
+ camembert_model = CamembertForSequenceClassification.from_pretrained("camembert-base", num_labels=2)
17
+ state_dict = torch.load('camembertperso.pth', map_location='cpu')
18
+ camembert_model.load_state_dict(state_dict, strict=False)
19
+ tokenizer = CamembertTokenizer.from_pretrained('camembert-base', do_lower_case=True)
20
+
21
+ nlp = spacy.load("fr_core_news_sm")
22
+
23
+ # Text Processing Functions
24
+ def clean_text(text):
25
+ return text.strip().lower()
26
+
27
+ def lemmatize_text(text):
28
+ doc = nlp(text)
29
+ lemmatized_text = " ".join([token.lemma_ for token in doc])
30
+ return lemmatized_text
31
+
32
+ # Prediction Functions
33
+ def predict_label(text):
34
+ cleaned_text = clean_text(text)
35
+ lemmatized_text = lemmatize_text(cleaned_text)
36
+
37
+ vectorized_text = vectorizer.transform([lemmatized_text])
38
+ label = model.predict(vectorized_text)[0]
39
+
40
+ probability_score = model.decision_function(vectorized_text)[0]
41
+ probability = 1 / (1 + np.exp(-probability_score))
42
+
43
+ return label, probability
44
+
45
+ def predict_camembert(text):
46
+ tokens = tokenizer.encode_plus(text, return_tensors="pt")
47
+
48
+ with torch.no_grad():
49
+ outputs = camembert_model(**tokens)
50
+
51
+ if len(outputs) == 1:
52
+ logits = outputs[0]
53
+ else:
54
+ logits = outputs[1]
55
+
56
+ predictions = torch.argmax(logits, dim=1).item()
57
+ probabilities = torch.softmax(logits, dim=1)[:, 1].item()
58
+
59
+ return predictions, probabilities
60
+
61
+ # App Interface
62
+ st.title('Analyse de sentiments')
63
+
64
+ st.write('Cet outil permet de prédire si une review est positive ou négative.')
65
+
66
+ review_text = st.text_area('Saisir la review ou charger un fichier :')
67
+
68
+ if st.button('Prédire et générer le nuage de mots'):
69
+ # LinearSVC Prediction
70
+ label_linear_svc, probability_linear_svc = predict_label(review_text)
71
+
72
+ # Display LinearSVC Results
73
+ st.write('Résultats de LinearSVC:')
74
+ if label_linear_svc == 0:
75
+ st.write('La review est négative.')
76
+ else:
77
+ st.write('La review est positive.')
78
+
79
+ # Display LinearSVC Prediction Score
80
+ st.write('Score de prédiction (LinearSVC) :', f'**{label_linear_svc}**', unsafe_allow_html=True)
81
+
82
+ # Display LinearSVC Probability
83
+ st.write('Probabilité (LinearSVC) :', f'**{probability_linear_svc:.2%}**', unsafe_allow_html=True)
84
+
85
+ # CamemBERT Prediction
86
+ label_camembert, probability_camembert = predict_camembert(review_text)
87
+
88
+ # Display CamemBERT Results
89
+ st.write('Résultats de Camembert:')
90
+ if label_camembert == 0:
91
+ st.write('La review est négative.')
92
+ else:
93
+ st.write('La review est positive.')
94
+
95
+ # Display CamemBERT Prediction Score
96
+ st.write('Score de prédiction (Camembert) :', f'**{label_camembert}**', unsafe_allow_html=True)
97
+
98
+ # Display CamemBERT Probability
99
+ st.write('Probabilité (Camembert) :', f'**{probability_camembert:.2%}**', unsafe_allow_html=True)
100
+
101
+ # Lemmatize and Exclude Stop Words
102
+ doc = nlp(review_text)
103
+ lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])
104
+
105
+
106
+ # Générer le nuage de mots
107
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
108
+ st.image(wordcloud.to_image())
109
+
110
+ # Créer un bouton pour l'upload d'un fichier
111
+ uploaded_file = st.file_uploader("Charger un fichier texte", type=["txt", "csv"])
112
+ if uploaded_file is not None:
113
+ content_type, _ = mimetypes.guess_type(uploaded_file.name)
114
+ if content_type == 'text/plain':
115
+ file_contents = uploaded_file.read().decode("utf-8")
116
+ st.text(file_contents)
117
+
118
+ # Lemmatiser le texte et exclure les mots vides
119
+ doc = nlp(file_contents)
120
+ lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])
121
+
122
+ # Générer le nuage de mots à partir du fichier uploadé
123
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
124
+ st.image(wordcloud.to_image())
125
+ elif content_type == 'text/csv':
126
+ df = pd.read_csv(uploaded_file)
127
+ st.write(df)