Spaces:

Artemis-IA
/

3-stars-sentiment-analysis

Runtime error

App Files Files Community

3-stars-sentiment-analysis / appp.py

Artemis-IA

Upload 7 files

8569b65 11 months ago

raw

history blame

4.43 kB

	# Imports
	import streamlit as st
	import pandas as pd
	import numpy as np
	import joblib
	import spacy
	from wordcloud import WordCloud
	from io import StringIO, BytesIO
	import mimetypes
	from transformers import CamembertForSequenceClassification, CamembertTokenizer
	import torch

	# Model Loading
	model = joblib.load('model.pkl')
	vectorizer = joblib.load('vectorizer.pkl')

	camembert_model = CamembertForSequenceClassification.from_pretrained("camembert-base", num_labels=2)
	state_dict = torch.load('camembertperso.pth', map_location='cpu')
	camembert_model.load_state_dict(state_dict, strict=False)
	tokenizer = CamembertTokenizer.from_pretrained('camembert-base', do_lower_case=True)

	nlp = spacy.load("fr_core_news_sm")

	# Text Processing Functions
	def clean_text(text):
	return text.strip().lower()

	def lemmatize_text(text):
	doc = nlp(text)
	lemmatized_text = " ".join([token.lemma_ for token in doc])
	return lemmatized_text

	# Prediction Functions
	def predict_label(text):
	cleaned_text = clean_text(text)
	lemmatized_text = lemmatize_text(cleaned_text)

	vectorized_text = vectorizer.transform([lemmatized_text])
	label = model.predict(vectorized_text)[0]

	probability_score = model.decision_function(vectorized_text)[0]
	probability = 1 / (1 + np.exp(-probability_score))

	return label, probability

	def predict_camembert(text):
	tokens = tokenizer.encode_plus(text, return_tensors="pt")

	with torch.no_grad():
	outputs = camembert_model(**tokens)

	if len(outputs) == 1:
	logits = outputs[0]
	else:
	logits = outputs[1]

	predictions = torch.argmax(logits, dim=1).item()
	probabilities = torch.softmax(logits, dim=1)[:, 1].item()

	return predictions, probabilities

	# App Interface
	st.title('Analyse de sentiments')

	st.write('Cet outil permet de prédire si une review est positive ou négative.')

	review_text = st.text_area('Saisir la review ou charger un fichier :')

	if st.button('Prédire et générer le nuage de mots'):
	# LinearSVC Prediction
	label_linear_svc, probability_linear_svc = predict_label(review_text)

	# Display LinearSVC Results
	st.write('Résultats de LinearSVC:')
	if label_linear_svc == 0:
	st.write('La review est négative.')
	else:
	st.write('La review est positive.')

	# Display LinearSVC Prediction Score
	st.write('Score de prédiction (LinearSVC) :', f'{label_linear_svc}', unsafe_allow_html=True)

	# Display LinearSVC Probability
	st.write('Probabilité (LinearSVC) :', f'{probability_linear_svc:.2%}', unsafe_allow_html=True)

	# CamemBERT Prediction
	label_camembert, probability_camembert = predict_camembert(review_text)

	# Display CamemBERT Results
	st.write('Résultats de Camembert:')
	if label_camembert == 0:
	st.write('La review est négative.')
	else:
	st.write('La review est positive.')

	# Display CamemBERT Prediction Score
	st.write('Score de prédiction (Camembert) :', f'{label_camembert}', unsafe_allow_html=True)

	# Display CamemBERT Probability
	st.write('Probabilité (Camembert) :', f'{probability_camembert:.2%}', unsafe_allow_html=True)

	# Lemmatize and Exclude Stop Words
	doc = nlp(review_text)
	lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])


	# Générer le nuage de mots
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
	st.image(wordcloud.to_image())

	# Créer un bouton pour l'upload d'un fichier
	uploaded_file = st.file_uploader("Charger un fichier texte", type=["txt", "csv"])
	if uploaded_file is not None:
	content_type, _ = mimetypes.guess_type(uploaded_file.name)
	if content_type == 'text/plain':
	file_contents = uploaded_file.read().decode("utf-8")
	st.text(file_contents)

	# Lemmatiser le texte et exclure les mots vides
	doc = nlp(file_contents)
	lemmatized_text_no_stopwords = " ".join([token.lemma_ for token in doc if not token.is_stop])

	# Générer le nuage de mots à partir du fichier uploadé
	wordcloud = WordCloud(width=800, height=400, background_color='white').generate(lemmatized_text_no_stopwords)
	st.image(wordcloud.to_image())
	elif content_type == 'text/csv':
	df = pd.read_csv(uploaded_file)
	st.write(df)