Spaces:

AroojImtiaz
/

Text_Analyzeer

Sleeping

App Files Files Community

AroojImtiaz commited on Jun 21, 2024

Commit

520faf7

verified ·

1 Parent(s): 917736f

Create app.py

Browse files

Files changed (1) hide show

app.py +198 -0

app.py ADDED Viewed

	@@ -0,0 +1,198 @@

+from tracemalloc import stop
+import streamlit as st
+import numpy as np
+import pandas as pd
+import re
+import string
+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.stem.porter import PorterStemmer
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import RandomForestClassifier
+nltk.download('punkt')
+nltk.download('stopwords')
+sw=nltk.corpus.stopwords.words("english")
+rad=st.sidebar.radio("Navigation",["Home","Sarcasm Detection","Sentiment Analysis","Spam or Ham Detection","Stress Detection","Hate and Offensive Content Detection"])
+#Home Page
+if rad=="Home":
+    st.title("Complete Text Analysis App")
+    st.image("SEO-articles-V2_Text-Analysis.png")
+    st.text(" ")
+    st.text("The Following Text Analysis Options Are Available->")
+    st.text(" ")
+    st.text("1. Spam or Ham Detection")
+    st.text("2. Sentiment Analysis")
+    st.text("3. Stress Detection")
+    st.text("4. Hate and Offensive Content Detection")
+    st.text("5. Sarcasm Detection")
+#function to clean and transform the user input which is in raw format
+def transform_text(text):
+    text=text.lower()
+    text=nltk.word_tokenize(text)
+    y=[]
+    for i in text:
+        if i.isalnum():
+            y.append(i)
+    text=y[:]
+    y.clear()
+    for i in text:
+        if i not in stopwords.words('english') and i not in string.punctuation:
+            y.append(i)
+    text=y[:]
+    y.clear()
+    ps=PorterStemmer()
+    for i in text:
+        y.append(ps.stem(i))
+    return " ".join(y)
+#Spam Detection Prediction
+tfidf1=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform1(txt1):
+    txt2=tfidf1.fit_transform(txt1)
+    return txt2.toarray()
+df1=pd.read_csv("Spam Detection.csv")
+df1.columns=["Label","Text"]
+x=transform1(df1["Text"])
+y=df1["Label"]
+x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0)
+model1=LogisticRegression()
+model1.fit(x_train1,y_train1)
+#Spam Detection Analysis Page
+if rad=="Spam or Ham Detection":
+    st.header("Detect Whether A Text Is Spam Or Ham??")
+    sent1=st.text_area("Enter The Text")
+    transformed_sent1=transform_text(sent1)
+    vector_sent1=tfidf1.transform([transformed_sent1])
+    prediction1=model1.predict(vector_sent1)[0]
+    if st.button("Predict"):
+        if prediction1=="spam":
+            st.warning("Spam Text!!")
+        elif prediction1=="ham":
+            st.success("Ham Text!!")
+#Sentiment Analysis Prediction
+tfidf2=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform2(txt1):
+    txt2=tfidf2.fit_transform(txt1)
+    return txt2.toarray()
+df2=pd.read_csv("Sentiment Analysis.csv")
+df2.columns=["Text","Label"]
+x=transform2(df2["Text"])
+y=df2["Label"]
+x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0)
+model2=LogisticRegression()
+model2.fit(x_train2,y_train2)
+#Sentiment Analysis Page
+if rad=="Sentiment Analysis":
+    st.header("Detect The Sentiment Of The Text!!")
+    sent2=st.text_area("Enter The Text")
+    transformed_sent2=transform_text(sent2)
+    vector_sent2=tfidf2.transform([transformed_sent2])
+    prediction2=model2.predict(vector_sent2)[0]
+    if st.button("Predict"):
+        if prediction2==0:
+            st.warning("Negetive Text!!")
+        elif prediction2==1:
+            st.success("Positive Text!!")
+#Stress Detection Prediction
+tfidf3=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform3(txt1):
+    txt2=tfidf3.fit_transform(txt1)
+    return txt2.toarray()
+df3=pd.read_csv("Stress Detection.csv")
+df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1)
+df3.columns=["Text","Sentiment","Stress Level"]
+x=transform3(df3["Text"])
+y=df3["Stress Level"].to_numpy()
+x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0)
+model3=DecisionTreeRegressor(max_leaf_nodes=2000)
+model3.fit(x_train3,y_train3)
+#Stress Detection Page
+if rad=="Stress Detection":
+    st.header("Detect The Amount Of Stress In The Text!!")
+    sent3=st.text_area("Enter The Text")
+    transformed_sent3=transform_text(sent3)
+    vector_sent3=tfidf3.transform([transformed_sent3])
+    prediction3=model3.predict(vector_sent3)[0]
+    if st.button("Predict"):
+        if prediction3>=0:
+            st.warning("Stressful Text!!")
+        elif prediction3<0:
+            st.success("Not A Stressful Text!!")
+#Hate & Offensive Content Prediction
+tfidf4=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform4(txt1):
+    txt2=tfidf4.fit_transform(txt1)
+    return txt2.toarray()
+df4=pd.read_csv("Hate Content Detection.csv")
+df4=df4.drop(["Unnamed: 0","count","neither"],axis=1)
+df4.columns=["Hate Level","Offensive Level","Class Level","Text"]
+x=transform4(df4["Text"])
+y=df4["Class Level"]
+x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0)
+model4=RandomForestClassifier()
+model4.fit(x_train4,y_train4)
+#Hate & Offensive Content Page
+if rad=="Hate and Offensive Content Detection":
+    st.header("Detect The Level Of Hate & Offensive Content In The Text!!")
+    sent4=st.text_area("Enter The Text")
+    transformed_sent4=transform_text(sent4)
+    vector_sent4=tfidf4.transform([transformed_sent4])
+    prediction4=model4.predict(vector_sent4)[0]
+    if st.button("Predict"):
+        if prediction4==0:
+            st.exception("Highly Offensive Text!!")
+        elif prediction4==1:
+            st.warning("Offensive Text!!")
+        elif prediction4==2:
+            st.success("Non Offensive Text!!")
+#Sarcasm Detection Prediction
+tfidf5=TfidfVectorizer(stop_words=sw,max_features=20)
+def transform5(txt1):
+    txt2=tfidf5.fit_transform(txt1)
+    return txt2.toarray()
+df5=pd.read_csv("Sarcasm Detection.csv")
+df5.columns=["Text","Label"]
+x=transform5(df5["Text"])
+y=df5["Label"]
+x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0)
+model5=LogisticRegression()
+model5.fit(x_train5,y_train5)
+#Sarcasm Detection Page
+if rad=="Sarcasm Detection":
+    st.header("Detect Whether The Text Is Sarcastic Or Not!!")
+    sent5=st.text_area("Enter The Text")
+    transformed_sent5=transform_text(sent5)
+    vector_sent5=tfidf5.transform([transformed_sent5])
+    prediction5=model5.predict(vector_sent5)[0]
+    if st.button("Predict"):
+        if prediction5==1:
+            st.exception("Sarcastic Text!!")
+        elif prediction5==0:
+            st.success("Non Sarcastic Text!!")