Spaces:

kajalag
/

Whatsapp_Chat_Analyzer

Build error

App Files Files Community

kajalag commited on Apr 11, 2023

Commit

7571eef

1 Parent(s): 2891753

Upload app.py

Browse files

Files changed (1) hide show

app.py +159 -0

app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import streamlit as st
+from transformers import pipeline
+from transformers import AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+import warnings
+warnings.filterwarnings("ignore")
+import nltk
+nltk.download('all')
+import matplotlib.pyplot as plt
+import helper
+import preprocessor
+from mtranslate import translate
+import pandas as pd
+import os
+from gtts import gTTS
+import base64
+import torch
+import seaborn as sns
+st.sidebar.title("Whatsapp Chat analyzer")
+uploaded_file= st.sidebar.file_uploader("Choose a file")
+if uploaded_file is not None:
+    bytes_data = uploaded_file.getvalue()
+    data=bytes_data.decode("utf-8")
+    df_new= preprocessor.preprocess(data)
+    user_list= df_new['users'].unique().tolist()
+    user_list.sort()
+    user_list.insert(0,"Group analysis")
+    selected_user=st.sidebar.selectbox("show analysis wrt",user_list)
+    if st.sidebar.button("Show Analysis"):
+        num_messages,words,num_links=helper.fetch_stats(selected_user,df_new)
+        st.title("Top Statistics")
+        col1,col2,col3=st.columns(3)
+        with col1:
+            st.header("Total Messages")
+            st.title(num_messages)
+        with col2:
+            st.header("Total Words")
+            st.title(words)
+        with col3:
+            st.header("Links Shared")
+            st.title(num_links)
+        st.title("Timeline")
+        col1, col2 = st.columns(2)
+        with col1:
+            st.header("Monthly ")
+            timeline = helper.monthly_timeline(selected_user, df_new)
+            fig, ax = plt.subplots()
+            ax.plot(timeline['time'], timeline['message'])
+            plt.xticks(rotation='vertical')
+            st.pyplot(fig)
+        with col2:
+            st.title("Daily")
+            daily_timeline = helper.Daily_timeline(selected_user, df_new)
+            fig, ax = plt.subplots()
+            ax.plot(daily_timeline['Date'], daily_timeline['message'], color='black')
+            plt.xticks(rotation='vertical')
+            st.pyplot(fig)
+        st.title("Activity Map")
+        col1,col2=st.columns(2)
+        with col1:
+            st.header("Most busy day")
+            busy_day=helper.week_activity_map(selected_user, df_new)
+            fig,ax=plt.subplots()
+            ax.bar(busy_day.index,busy_day.values,color=('violet','indigo','blue','green','yellow','orange','red'))
+            plt.xticks(rotation='vertical')
+            st.pyplot(fig)
+        with col2:
+            st.header("Most busy Month")
+            busy_day = helper.month_activity_map(selected_user, df_new)
+            fig, ax = plt.subplots()
+            ax.bar(busy_day.index, busy_day.values,color=('indigo','blue','green','red'))
+            plt.xticks(rotation='vertical')
+            st.pyplot(fig)
+        st.title("Weekly Activity HeatMap")
+        Activity_heatmap=helper.activity_heatmap(selected_user,df_new)
+        fig,ax=plt.subplots()
+        ax=sns.heatmap(Activity_heatmap,cmap='RdBu',linewidths=1,linecolor='black')
+        st.pyplot(fig)
+        if selected_user == "Group analysis":
+            st.title("Most busy user")
+            x,new_df=helper.most_busy_users(df_new)
+            fig,ax=plt.subplots()
+            col1,col2=st.columns(2)
+            with col1:
+                ax.bar(x.index, x.values,color=('blue','red','pink','orange','green'))
+                plt.xticks(rotation='vertical')
+                st.pyplot(fig)
+            with col2:
+                st.dataframe(new_df)
+        st.title("Chat Sentiment Analysis")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.header("Positive")
+            pos_words = helper.pos_words(selected_user, df_new)
+            st.dataframe(pos_words)
+        with col2:
+            st.header("Negative")
+            neg_words = helper.neg_words(selected_user, df_new)
+            st.dataframe(neg_words)
+        with col3:
+            st.header("Neutral")
+            neu_words = helper.neu_words(selected_user, df_new)
+            st.dataframe(neu_words)
+        st.title("Word cloud")
+        df_wc = helper.word_cloud(selected_user, df_new)
+        fig, ax = plt.subplots()
+        ax.imshow(df_wc)
+        plt.axis('off')
+        st.pyplot(fig)
+        st.title("Most Common Words")
+        most_common_df=helper.most_common_words(selected_user,df_new)
+        fig,ax=plt.subplots()
+        ax.barh(most_common_df[0],most_common_df[1])
+        st.pyplot(fig)
+        st.dataframe(most_common_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
+        emoji_df=helper.emoji_helper(selected_user,df_new)
+        st.title("Emoji Analysis")
+        st.dataframe(emoji_df.style.set_properties(**{"background-color": "black", "color": "lawngreen"}))
+st.title("Sentiment Analysis")
+@st.cache(allow_output_mutation=True)
+def get_model():
+    MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
+    tokenizer = AutoTokenizer.from_pretrained(MODEL)
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL)
+    return tokenizer,model
+tokenizer, model = get_model()
+user_input = st.text_area('Enter Text to Analyze')
+button = st.button("Analyze")
+sent_pipeline = pipeline("sentiment-analysis")
+if user_input and button:
+    test_sample = tokenizer([user_input], padding=True, truncation=True, max_length=512, return_tensors='pt')
+    # test_sample
+    output = model(**test_sample)
+    st.write("Prediction: ", sent_pipeline(user_input))
+    showWarningOnDirectExecution = False