Spaces:

Artemis-IA
/

3-stars-sentiment-analysis

Runtime error

App Files Files Community

Artemis-IA commited on Dec 2, 2023

Commit

3daec5b

•

1 Parent(s): 8569b65

Create app.py

Browse files

Files changed (1) hide show

app.py +127 -0

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+import spacy
+from wordcloud import WordCloud
+from io import StringIO, BytesIO
+import mimetypes
+from transformers import CamembertForSequenceClassification, CamembertTokenizer
+import torch
+# Model Loading
+model = joblib.load('model.pkl')
+vectorizer = joblib.load('vectorizer.pkl')
+camembert_model = CamembertForSequenceClassification.from_pretrained("camembert-base", num_labels=2)
+state_dict = torch.load('camembertperso.pth', map_location='cpu')
+camembert_model.load_state_dict(state_dict, strict=False)
+tokenizer = CamembertTokenizer.from_pretrained('camembert-base', do_lower_case=True)
+nlp = spacy.load("fr_core_news_sm")
+# Text Processing Functions
+def clean_text(text):
+    return text.strip().lower()
+def lemmatize_text(text):
+    doc = nlp(text)
+    lemmatized_text = " ".join([token.lemma_ for token in doc])
+    return lemmatized_text
+# Prediction Functions
+def predict_label(text):
+    cleaned_text = clean_text(text)
+    lemmatized_text = lemmatize_text(cleaned_text)
+    vectorized_text = vectorizer.transform([lemmatized_text])
+    label = model.predict(vectorized_text)[0]
+    probability_score = model.decision_function(vectorized_text)[0]
+    probability = 1 / (1 + np.exp(-probability_score))
+    return label, probability
+def predict_camembert(text):
+    tokens = tokenizer.encode_plus(text, return_tensors="pt")
+    with torch.no_grad():
+        outputs = camembert_model(**tokens)
+        if len(outputs) == 1:
+            logits = outputs[0]
+        else:
+            logits = outputs[1]
+        predictions = torch.argmax(logits, dim=1).item()
+        probabilities = torch.softmax(logits, dim=1)[:, 1].item()
+    return predictions, probabilities
+# App Interface
+st.title('Analyse de sentiments')
+st.write('Cet outil permet de prédire si une review est positive ou négative.')
+review_text = st.text_area('Saisir la review ou charger un fichier :')
+if st.button('Prédire et générer le nuage de mots'):
+    # LinearSVC Prediction
+    label_linear_svc, probability_linear_svc = predict_label(review_text)
+    # Display LinearSVC Results
+    st.write('Résultats de LinearSVC:')
+    if label_linear_svc == 0:
+        st.write('La review est négative.')
+    else:
+        st.write('La review est positive.')
+    # Display LinearSVC Prediction Score
+    st.write('Score de prédiction (LinearSVC) :', f'**{label_linear_svc}**', unsafe_allow_html=True)
+    # Display LinearSVC Probability
+    st.write('Probabilité (LinearSVC) :', f'**{probability_linear_svc:.2%}**', unsafe_allow_html=True)
+    # CamemBERT Prediction
+    label_camembert, probability_camembert = predict_camembert(review_text)
+    # Display CamemBERT Results
+    st.write('Résultats de Camembert:')
+    if label_camembert == 0:
+        st.write('La review est négative.')
+    else:
+        st.write('La review est positive.')
+    # Display CamemBERT Prediction Score
+    st.write('Score de prédiction (Camembert) :', f'**{label_camembert}**', unsafe_allow_html=True)
+    # Display CamemBERT Probability
+    st.write('Probabilité (Camembert) :', f'**{probability_camembert:.2%}**', unsafe_allow_html=True)
+    # Lemmatize and Exclude Stop Words
+    doc = nlp(review_text)
+    lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])
+    # Générer le nuage de mots
+    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
+    st.image(wordcloud.to_image())
+# Créer un bouton pour l'upload d'un fichier
+uploaded_file = st.file_uploader("Charger un fichier texte", type=["txt", "csv"])
+if uploaded_file is not None:
+    content_type, _ = mimetypes.guess_type(uploaded_file.name)
+    if content_type == 'text/plain':
+        file_contents = uploaded_file.read().decode("utf-8")
+        st.text(file_contents)
+        # Lemmatiser le texte et exclure les mots vides
+        doc = nlp(file_contents)
+        lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])
+        # Générer le nuage de mots à partir du fichier uploadé
+        wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
+        st.image(wordcloud.to_image())
+    elif content_type == 'text/csv':
+        df = pd.read_csv(uploaded_file)
+        st.write(df)