Spaces:
Build error
Build error
File size: 5,489 Bytes
7571eef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import warnings
warnings.filterwarnings("ignore")
import nltk
nltk.download('all')
import matplotlib.pyplot as plt
import helper
import preprocessor
from mtranslate import translate
import pandas as pd
import os
from gtts import gTTS
import base64
import torch
import seaborn as sns
st.sidebar.title("Whatsapp Chat analyzer")
uploaded_file= st.sidebar.file_uploader("Choose a file")
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
data=bytes_data.decode("utf-8")
df_new= preprocessor.preprocess(data)
user_list= df_new['users'].unique().tolist()
user_list.sort()
user_list.insert(0,"Group analysis")
selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
if st.sidebar.button("Show Analysis"):
num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
st.title("Top Statistics")
col1,col2,col3=st.columns(3)
with col1:
st.header("Total Messages")
st.title(num_messages)
with col2:
st.header("Total Words")
st.title(words)
with col3:
st.header("Links Shared")
st.title(num_links)
st.title("Timeline")
col1, col2 = st.columns(2)
with col1:
st.header("Monthly ")
timeline = helper.monthly_timeline(selected_user, df_new)
fig, ax = plt.subplots()
ax.plot(timeline['time'], timeline['message'])
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.title("Daily")
daily_timeline = helper.Daily_timeline(selected_user, df_new)
fig, ax = plt.subplots()
ax.plot(daily_timeline['Date'], daily_timeline['message'], color='black')
plt.xticks(rotation='vertical')
st.pyplot(fig)
st.title("Activity Map")
col1,col2=st.columns(2)
with col1:
st.header("Most busy day")
busy_day=helper.week_activity_map(selected_user, df_new)
fig,ax=plt.subplots()
ax.bar(busy_day.index,busy_day.values,color=('violet','indigo','blue','green','yellow','orange','red'))
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.header("Most busy Month")
busy_day = helper.month_activity_map(selected_user, df_new)
fig, ax = plt.subplots()
ax.bar(busy_day.index, busy_day.values,color=('indigo','blue','green','red'))
plt.xticks(rotation='vertical')
st.pyplot(fig)
st.title("Weekly Activity HeatMap")
Activity_heatmap=helper.activity_heatmap(selected_user,df_new)
fig,ax=plt.subplots()
ax=sns.heatmap(Activity_heatmap,cmap='RdBu',linewidths=1,linecolor='black')
st.pyplot(fig)
if selected_user == "Group analysis":
st.title("Most busy user")
x,new_df=helper.most_busy_users(df_new)
fig,ax=plt.subplots()
col1,col2=st.columns(2)
with col1:
ax.bar(x.index, x.values,color=('blue','red','pink','orange','green'))
plt.xticks(rotation='vertical')
st.pyplot(fig)
with col2:
st.dataframe(new_df)
st.title("Chat Sentiment Analysis")
col1, col2, col3 = st.columns(3)
with col1:
st.header("Positive")
pos_words = helper.pos_words(selected_user, df_new)
st.dataframe(pos_words)
with col2:
st.header("Negative")
neg_words = helper.neg_words(selected_user, df_new)
st.dataframe(neg_words)
with col3:
st.header("Neutral")
neu_words = helper.neu_words(selected_user, df_new)
st.dataframe(neu_words)
st.title("Word cloud")
df_wc = helper.word_cloud(selected_user, df_new)
fig, ax = plt.subplots()
ax.imshow(df_wc)
plt.axis('off')
st.pyplot(fig)
st.title("Most Common Words")
most_common_df=helper.most_common_words(selected_user,df_new)
fig,ax=plt.subplots()
ax.barh(most_common_df[0],most_common_df[1])
st.pyplot(fig)
st.dataframe(most_common_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
emoji_df=helper.emoji_helper(selected_user,df_new)
st.title("Emoji Analysis")
st.dataframe(emoji_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
st.title("Sentiment Analysis")
@st.cache(allow_output_mutation=True)
def get_model():
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
return tokenizer,model
tokenizer, model = get_model()
user_input = st.text_area('Enter Text to Analyze')
button = st.button("Analyze")
sent_pipeline = pipeline("sentiment-analysis")
if user_input and button:
test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
# test_sample
output = model(**test_sample)
st.write("Prediction: ", sent_pipeline(user_input))
showWarningOnDirectExecution = False
|