Spaces:
Build error
Build error
import streamlit as st | |
from transformers import pipeline | |
from transformers import AutoTokenizer | |
from transformers import AutoModelForSequenceClassification | |
import warnings | |
warnings.filterwarnings("ignore") | |
import nltk | |
nltk.download('all') | |
import matplotlib.pyplot as plt | |
import helper | |
import preprocessor | |
from mtranslate import translate | |
import pandas as pd | |
import os | |
from gtts import gTTS | |
import base64 | |
import torch | |
import seaborn as sns | |
st.sidebar.title("Whatsapp Chat analyzer") | |
uploaded_file= st.sidebar.file_uploader("Choose a file") | |
if uploaded_file is not None: | |
bytes_data = uploaded_file.getvalue() | |
data=bytes_data.decode("utf-8") | |
df_new= preprocessor.preprocess(data) | |
user_list= df_new['users'].unique().tolist() | |
user_list.sort() | |
user_list.insert(0,"Group analysis") | |
selected_user=st.sidebar.selectbox("show analysis wrt",user_list) | |
if st.sidebar.button("Show Analysis"): | |
num_messages,words,num_links=helper.fetch_stats(selected_user,df_new) | |
st.title("Top Statistics") | |
col1,col2,col3=st.columns(3) | |
with col1: | |
st.header("Total Messages") | |
st.title(num_messages) | |
with col2: | |
st.header("Total Words") | |
st.title(words) | |
with col3: | |
st.header("Links Shared") | |
st.title(num_links) | |
st.title("Timeline") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.header("Monthly ") | |
timeline = helper.monthly_timeline(selected_user, df_new) | |
fig, ax = plt.subplots() | |
ax.plot(timeline['time'], timeline['message']) | |
plt.xticks(rotation='vertical') | |
st.pyplot(fig) | |
with col2: | |
st.title("Daily") | |
daily_timeline = helper.Daily_timeline(selected_user, df_new) | |
fig, ax = plt.subplots() | |
ax.plot(daily_timeline['Date'], daily_timeline['message'], color='black') | |
plt.xticks(rotation='vertical') | |
st.pyplot(fig) | |
st.title("Activity Map") | |
col1,col2=st.columns(2) | |
with col1: | |
st.header("Most busy day") | |
busy_day=helper.week_activity_map(selected_user, df_new) | |
fig,ax=plt.subplots() | |
ax.bar(busy_day.index,busy_day.values,color=('violet','indigo','blue','green','yellow','orange','red')) | |
plt.xticks(rotation='vertical') | |
st.pyplot(fig) | |
with col2: | |
st.header("Most busy Month") | |
busy_day = helper.month_activity_map(selected_user, df_new) | |
fig, ax = plt.subplots() | |
ax.bar(busy_day.index, busy_day.values,color=('indigo','blue','green','red')) | |
plt.xticks(rotation='vertical') | |
st.pyplot(fig) | |
st.title("Weekly Activity HeatMap") | |
Activity_heatmap=helper.activity_heatmap(selected_user,df_new) | |
fig,ax=plt.subplots() | |
ax=sns.heatmap(Activity_heatmap,cmap='RdBu',linewidths=1,linecolor='black') | |
st.pyplot(fig) | |
if selected_user == "Group analysis": | |
st.title("Most busy user") | |
x,new_df=helper.most_busy_users(df_new) | |
fig,ax=plt.subplots() | |
col1,col2=st.columns(2) | |
with col1: | |
ax.bar(x.index, x.values,color=('blue','red','pink','orange','green')) | |
plt.xticks(rotation='vertical') | |
st.pyplot(fig) | |
with col2: | |
st.dataframe(new_df) | |
st.title("Chat Sentiment Analysis") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.header("Positive") | |
pos_words = helper.pos_words(selected_user, df_new) | |
st.dataframe(pos_words) | |
with col2: | |
st.header("Negative") | |
neg_words = helper.neg_words(selected_user, df_new) | |
st.dataframe(neg_words) | |
with col3: | |
st.header("Neutral") | |
neu_words = helper.neu_words(selected_user, df_new) | |
st.dataframe(neu_words) | |
st.title("Word cloud") | |
df_wc = helper.word_cloud(selected_user, df_new) | |
fig, ax = plt.subplots() | |
ax.imshow(df_wc) | |
plt.axis('off') | |
st.pyplot(fig) | |
st.title("Most Common Words") | |
most_common_df=helper.most_common_words(selected_user,df_new) | |
fig,ax=plt.subplots() | |
ax.barh(most_common_df[0],most_common_df[1]) | |
st.pyplot(fig) | |
st.dataframe(most_common_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"})) | |
emoji_df=helper.emoji_helper(selected_user,df_new) | |
st.title("Emoji Analysis") | |
st.dataframe(emoji_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"})) | |
st.title("Sentiment Analysis") | |
def get_model(): | |
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
return tokenizer,model | |
tokenizer, model = get_model() | |
user_input = st.text_area('Enter Text to Analyze') | |
button = st.button("Analyze") | |
sent_pipeline = pipeline("sentiment-analysis") | |
if user_input and button: | |
test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt') | |
# test_sample | |
output = model(**test_sample) | |
st.write("Prediction: ", sent_pipeline(user_input)) | |
showWarningOnDirectExecution = False | |