irfantea commited on
Commit
aa5684e
1 Parent(s): c04418b

Upload 3 files

Browse files
requirements.txt ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ altair==4.2.2
2
+ attrs==23.1.0
3
+ blinker==1.6.2
4
+ cachetools==5.3.0
5
+ certifi==2022.12.7
6
+ charset-normalizer==3.1.0
7
+ click==8.1.3
8
+ colorama==0.4.6
9
+ contourpy==1.0.7
10
+ cycler==0.11.0
11
+ decorator==5.1.1
12
+ entrypoints==0.4
13
+ fonttools==4.39.3
14
+ gitdb==4.0.10
15
+ GitPython==3.1.31
16
+ idna==3.4
17
+ importlib-metadata==6.6.0
18
+ Jinja2==3.1.2
19
+ joblib==1.2.0
20
+ jsonschema==4.17.3
21
+ kiwisolver==1.4.4
22
+ markdown-it-py==2.2.0
23
+ MarkupSafe==2.1.2
24
+ matplotlib==3.7.1
25
+ mdurl==0.1.2
26
+ nltk==3.8.1
27
+ numpy==1.24.3
28
+ packaging==23.1
29
+ pandas==2.0.1
30
+ Pillow==9.5.0
31
+ protobuf==3.20.3
32
+ pyarrow==11.0.0
33
+ pydeck==0.8.1b0
34
+ Pygments==2.15.1
35
+ Pympler==1.0.1
36
+ pyparsing==3.0.9
37
+ pyrsistent==0.19.3
38
+ python-dateutil==2.8.2
39
+ pytz==2023.3
40
+ pytz-deprecation-shim==0.1.0.post0
41
+ regex==2023.3.23
42
+ requests==2.29.0
43
+ rich==13.3.5
44
+ six==1.16.0
45
+ smmap==5.0.0
46
+ streamlit==1.22.0
47
+ tenacity==8.2.2
48
+ textblob==0.17.1
49
+ toml==0.10.2
50
+ toolz==0.12.0
51
+ tornado==6.3.1
52
+ tqdm==4.65.0
53
+ typing_extensions==4.5.0
54
+ tzdata==2023.3
55
+ tzlocal==4.3
56
+ urllib3==1.26.15
57
+ validators==0.20.0
58
+ watchdog==3.0.0
59
+ wordcloud==1.9.1.1
60
+ zipp==3.15.0
smart.farming.sentiment.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import streamlit as st
3
+ from textblob import TextBlob
4
+ import re
5
+ import matplotlib.pyplot as plt
6
+
7
+ import nltk
8
+ from nltk.corpus import stopwords
9
+ from collections import Counter
10
+ from wordcloud import WordCloud
11
+ nltk.download('stopwords')
12
+
13
+ st.set_page_config(
14
+ page_title="Smart Farming Sentiment Analysis",
15
+ page_icon="🌱",
16
+ layout="wide"
17
+ )
18
+
19
+ def set_cleantext(dataframe):
20
+ #Sentence less than 10 words
21
+ dataframe = dataframe[dataframe['sentences'].apply(lambda x: len(x.split()) >= 10)]
22
+ #Delete web address
23
+ dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.replace(url_pattern, '', regex=True)
24
+ #Removing empty spaces
25
+ dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.replace('\n', ' ')
26
+ dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.strip()
27
+
28
+ #Delete duplicate sentences
29
+ dataframe = dataframe.drop_duplicates(subset=['sentences'])
30
+ dataframe.reset_index(drop=True, inplace=True)
31
+
32
+ return dataframe
33
+
34
+ def set_textblob(dataframe):
35
+ # apply TextBlob to the value in the column
36
+ text_blob = TextBlob(dataframe['sentences'])
37
+ # add new columns for polarity and subjectivity
38
+ dataframe['polarity'] = text_blob.sentiment.polarity
39
+ dataframe['subjectivity'] = text_blob.sentiment.subjectivity
40
+ return dataframe
41
+
42
+ def delete_stopwords(dataframe):
43
+ #Delete stopwords
44
+ stop = stopwords.words('english')
45
+ dataframe['sentences'] = dataframe['sentences'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
46
+ return dataframe
47
+
48
+ df = pd.read_csv("smartfarmingsentences.csv")
49
+ num_ori = df.shape[0]
50
+
51
+ st.title("Smart Farming Sentiment Analysis")
52
+ st.subheader("Sentiment Analysis of Smart Farming Knowledge Graph")
53
+
54
+ url_pattern = re.compile(r'https?://\S+')
55
+ df = set_cleantext(df)
56
+
57
+ num_clean = df.shape[0]
58
+
59
+ kolom_num1, kolom_num2, kolom_num3 = st.columns(3)
60
+
61
+ with kolom_num1:
62
+ st.text("Original Sentences: " + str(num_ori))
63
+ with kolom_num2:
64
+ st.text("Sentences Count: " + str(num_clean))
65
+ with kolom_num3:
66
+ st.text("Deleted Sentences: " + str(num_ori - num_clean))
67
+
68
+ #Sentiment Analysis
69
+ df = df.apply(set_textblob, axis=1)
70
+ st.dataframe(df, use_container_width=True)
71
+
72
+ #Separate polarity by Positive, Neutral, Negative
73
+ df_pos = df[df['polarity'] > 0]
74
+ df_neu = df[df['polarity'] == 0]
75
+ df_neg = df[df['polarity'] < 0]
76
+
77
+ #Separate subjectivity by Objective, Subjective
78
+ df_obj = df[df['subjectivity'] <= 0.3]
79
+ df_sub = df[df['subjectivity'] > 0.3]
80
+
81
+ figp, ax = plt.subplots()
82
+ bars = ax.bar(['Positive', 'Neutral', 'Negative'], [len(df_pos), len(df_neu), len(df_neg)], color=['green', 'gray', 'red'])
83
+ ax.set_xlabel('Sentiment')
84
+ ax.set_ylabel('Count')
85
+ ax.set_title('Sentiment Analysis')
86
+
87
+ figs, ax = plt.subplots()
88
+ bars = ax.bar(['Objective', 'Subjective'], [len(df_obj), len(df_sub)], color=['green', 'red'])
89
+ ax.set_xlabel('Subjectivity')
90
+ ax.set_ylabel('Count')
91
+ ax.set_title('Subjectivity Analysis')
92
+
93
+ kolom_polar, kolom_subject = st.columns(2)
94
+
95
+ with kolom_polar:
96
+ #Show Sentiment Analysis
97
+ st.subheader("Sentiment Analysis")
98
+ st.text("Positive: " + str(df_pos.shape[0]))
99
+ st.text("Neutral: " + str(df_neu.shape[0]))
100
+ st.text("Negative: " + str(df_neg.shape[0]))
101
+ # Create a bar chart
102
+ st.pyplot(figp)
103
+ with kolom_subject:
104
+ #Show Subjectivity Analysis
105
+ st.subheader("Subjectivity Analysis")
106
+ st.text("Objective: " + str(df_obj.shape[0]))
107
+ st.text("Subjective: " + str(df_sub.shape[0]))
108
+ st.text("---")
109
+ # Create a bar chart
110
+ st.pyplot(figs)
111
+
112
+ #Make Lowercase
113
+ df['sentences'] = df['sentences'].str.lower()
114
+
115
+ #remove punctuation . , ! ? : ; " ' ( ) [ ] { } < > / \ | ` ~ @ # $ % ^ & * - _ = +
116
+ df['sentences'] = df['sentences'].str.replace('[.,!?;:"\'()\[\]{}<>\\/|`~@#$%^&*\-_+=]', '')
117
+
118
+ #Stopwords
119
+ df = delete_stopwords(df)
120
+
121
+ #Delete one or two words
122
+ df['sentences'] = df['sentences'].apply(lambda x: ' '.join([word for word in x.split() if len(word) > 2]))
123
+
124
+ #Remove custom words
125
+ custom_words = ["s", "al", 'view', 'article', 'google', 'scholar', "scopus", "crossref"]
126
+ df['sentences'] = df['sentences'].apply(lambda x: ' '.join([word for word in x.split() if word not in (custom_words)]))
127
+
128
+ #Make Wordcloud
129
+ all_words = ' '.join([text for text in df['sentences']])
130
+ wordcloud = WordCloud(width=1024, height=1024, random_state=21, max_font_size=110).generate(all_words)
131
+ st.subheader("Wordcloud")
132
+ st.text("Total Words: " + str(len(all_words)))
133
+ plt.figure(figsize=(10, 7))
134
+ plt.imshow(wordcloud, interpolation="bilinear")
135
+ plt.axis('off')
136
+ st.pyplot(plt)
137
+
138
+ #Show Top 10 Words
139
+ word_freq = Counter(all_words.split()).most_common(1000)
140
+ df_word_freq = pd.DataFrame(word_freq, columns=['Word', 'Frequency'])
141
+ st.subheader("Top 10 Words")
142
+ st.dataframe(df_word_freq.head(1000), use_container_width=True)
smartfarmingsentences.csv ADDED
The diff for this file is too large to render. See raw diff