Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- requirements.txt +60 -0
- smart.farming.sentiment.py +142 -0
- smartfarmingsentences.csv +0 -0
requirements.txt
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==4.2.2
|
2 |
+
attrs==23.1.0
|
3 |
+
blinker==1.6.2
|
4 |
+
cachetools==5.3.0
|
5 |
+
certifi==2022.12.7
|
6 |
+
charset-normalizer==3.1.0
|
7 |
+
click==8.1.3
|
8 |
+
colorama==0.4.6
|
9 |
+
contourpy==1.0.7
|
10 |
+
cycler==0.11.0
|
11 |
+
decorator==5.1.1
|
12 |
+
entrypoints==0.4
|
13 |
+
fonttools==4.39.3
|
14 |
+
gitdb==4.0.10
|
15 |
+
GitPython==3.1.31
|
16 |
+
idna==3.4
|
17 |
+
importlib-metadata==6.6.0
|
18 |
+
Jinja2==3.1.2
|
19 |
+
joblib==1.2.0
|
20 |
+
jsonschema==4.17.3
|
21 |
+
kiwisolver==1.4.4
|
22 |
+
markdown-it-py==2.2.0
|
23 |
+
MarkupSafe==2.1.2
|
24 |
+
matplotlib==3.7.1
|
25 |
+
mdurl==0.1.2
|
26 |
+
nltk==3.8.1
|
27 |
+
numpy==1.24.3
|
28 |
+
packaging==23.1
|
29 |
+
pandas==2.0.1
|
30 |
+
Pillow==9.5.0
|
31 |
+
protobuf==3.20.3
|
32 |
+
pyarrow==11.0.0
|
33 |
+
pydeck==0.8.1b0
|
34 |
+
Pygments==2.15.1
|
35 |
+
Pympler==1.0.1
|
36 |
+
pyparsing==3.0.9
|
37 |
+
pyrsistent==0.19.3
|
38 |
+
python-dateutil==2.8.2
|
39 |
+
pytz==2023.3
|
40 |
+
pytz-deprecation-shim==0.1.0.post0
|
41 |
+
regex==2023.3.23
|
42 |
+
requests==2.29.0
|
43 |
+
rich==13.3.5
|
44 |
+
six==1.16.0
|
45 |
+
smmap==5.0.0
|
46 |
+
streamlit==1.22.0
|
47 |
+
tenacity==8.2.2
|
48 |
+
textblob==0.17.1
|
49 |
+
toml==0.10.2
|
50 |
+
toolz==0.12.0
|
51 |
+
tornado==6.3.1
|
52 |
+
tqdm==4.65.0
|
53 |
+
typing_extensions==4.5.0
|
54 |
+
tzdata==2023.3
|
55 |
+
tzlocal==4.3
|
56 |
+
urllib3==1.26.15
|
57 |
+
validators==0.20.0
|
58 |
+
watchdog==3.0.0
|
59 |
+
wordcloud==1.9.1.1
|
60 |
+
zipp==3.15.0
|
smart.farming.sentiment.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
from textblob import TextBlob
|
4 |
+
import re
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
+
import nltk
|
8 |
+
from nltk.corpus import stopwords
|
9 |
+
from collections import Counter
|
10 |
+
from wordcloud import WordCloud
|
11 |
+
nltk.download('stopwords')
|
12 |
+
|
13 |
+
st.set_page_config(
|
14 |
+
page_title="Smart Farming Sentiment Analysis",
|
15 |
+
page_icon="🌱",
|
16 |
+
layout="wide"
|
17 |
+
)
|
18 |
+
|
19 |
+
def set_cleantext(dataframe):
|
20 |
+
#Sentence less than 10 words
|
21 |
+
dataframe = dataframe[dataframe['sentences'].apply(lambda x: len(x.split()) >= 10)]
|
22 |
+
#Delete web address
|
23 |
+
dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.replace(url_pattern, '', regex=True)
|
24 |
+
#Removing empty spaces
|
25 |
+
dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.replace('\n', ' ')
|
26 |
+
dataframe.loc[:, 'sentences'] = dataframe['sentences'].str.strip()
|
27 |
+
|
28 |
+
#Delete duplicate sentences
|
29 |
+
dataframe = dataframe.drop_duplicates(subset=['sentences'])
|
30 |
+
dataframe.reset_index(drop=True, inplace=True)
|
31 |
+
|
32 |
+
return dataframe
|
33 |
+
|
34 |
+
def set_textblob(dataframe):
|
35 |
+
# apply TextBlob to the value in the column
|
36 |
+
text_blob = TextBlob(dataframe['sentences'])
|
37 |
+
# add new columns for polarity and subjectivity
|
38 |
+
dataframe['polarity'] = text_blob.sentiment.polarity
|
39 |
+
dataframe['subjectivity'] = text_blob.sentiment.subjectivity
|
40 |
+
return dataframe
|
41 |
+
|
42 |
+
def delete_stopwords(dataframe):
|
43 |
+
#Delete stopwords
|
44 |
+
stop = stopwords.words('english')
|
45 |
+
dataframe['sentences'] = dataframe['sentences'].apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)]))
|
46 |
+
return dataframe
|
47 |
+
|
48 |
+
df = pd.read_csv("smartfarmingsentences.csv")
|
49 |
+
num_ori = df.shape[0]
|
50 |
+
|
51 |
+
st.title("Smart Farming Sentiment Analysis")
|
52 |
+
st.subheader("Sentiment Analysis of Smart Farming Knowledge Graph")
|
53 |
+
|
54 |
+
url_pattern = re.compile(r'https?://\S+')
|
55 |
+
df = set_cleantext(df)
|
56 |
+
|
57 |
+
num_clean = df.shape[0]
|
58 |
+
|
59 |
+
kolom_num1, kolom_num2, kolom_num3 = st.columns(3)
|
60 |
+
|
61 |
+
with kolom_num1:
|
62 |
+
st.text("Original Sentences: " + str(num_ori))
|
63 |
+
with kolom_num2:
|
64 |
+
st.text("Sentences Count: " + str(num_clean))
|
65 |
+
with kolom_num3:
|
66 |
+
st.text("Deleted Sentences: " + str(num_ori - num_clean))
|
67 |
+
|
68 |
+
#Sentiment Analysis
|
69 |
+
df = df.apply(set_textblob, axis=1)
|
70 |
+
st.dataframe(df, use_container_width=True)
|
71 |
+
|
72 |
+
#Separate polarity by Positive, Neutral, Negative
|
73 |
+
df_pos = df[df['polarity'] > 0]
|
74 |
+
df_neu = df[df['polarity'] == 0]
|
75 |
+
df_neg = df[df['polarity'] < 0]
|
76 |
+
|
77 |
+
#Separate subjectivity by Objective, Subjective
|
78 |
+
df_obj = df[df['subjectivity'] <= 0.3]
|
79 |
+
df_sub = df[df['subjectivity'] > 0.3]
|
80 |
+
|
81 |
+
figp, ax = plt.subplots()
|
82 |
+
bars = ax.bar(['Positive', 'Neutral', 'Negative'], [len(df_pos), len(df_neu), len(df_neg)], color=['green', 'gray', 'red'])
|
83 |
+
ax.set_xlabel('Sentiment')
|
84 |
+
ax.set_ylabel('Count')
|
85 |
+
ax.set_title('Sentiment Analysis')
|
86 |
+
|
87 |
+
figs, ax = plt.subplots()
|
88 |
+
bars = ax.bar(['Objective', 'Subjective'], [len(df_obj), len(df_sub)], color=['green', 'red'])
|
89 |
+
ax.set_xlabel('Subjectivity')
|
90 |
+
ax.set_ylabel('Count')
|
91 |
+
ax.set_title('Subjectivity Analysis')
|
92 |
+
|
93 |
+
kolom_polar, kolom_subject = st.columns(2)
|
94 |
+
|
95 |
+
with kolom_polar:
|
96 |
+
#Show Sentiment Analysis
|
97 |
+
st.subheader("Sentiment Analysis")
|
98 |
+
st.text("Positive: " + str(df_pos.shape[0]))
|
99 |
+
st.text("Neutral: " + str(df_neu.shape[0]))
|
100 |
+
st.text("Negative: " + str(df_neg.shape[0]))
|
101 |
+
# Create a bar chart
|
102 |
+
st.pyplot(figp)
|
103 |
+
with kolom_subject:
|
104 |
+
#Show Subjectivity Analysis
|
105 |
+
st.subheader("Subjectivity Analysis")
|
106 |
+
st.text("Objective: " + str(df_obj.shape[0]))
|
107 |
+
st.text("Subjective: " + str(df_sub.shape[0]))
|
108 |
+
st.text("---")
|
109 |
+
# Create a bar chart
|
110 |
+
st.pyplot(figs)
|
111 |
+
|
112 |
+
#Make Lowercase
|
113 |
+
df['sentences'] = df['sentences'].str.lower()
|
114 |
+
|
115 |
+
#remove punctuation . , ! ? : ; " ' ( ) [ ] { } < > / \ | ` ~ @ # $ % ^ & * - _ = +
|
116 |
+
df['sentences'] = df['sentences'].str.replace('[.,!?;:"\'()\[\]{}<>\\/|`~@#$%^&*\-_+=]', '')
|
117 |
+
|
118 |
+
#Stopwords
|
119 |
+
df = delete_stopwords(df)
|
120 |
+
|
121 |
+
#Delete one or two words
|
122 |
+
df['sentences'] = df['sentences'].apply(lambda x: ' '.join([word for word in x.split() if len(word) > 2]))
|
123 |
+
|
124 |
+
#Remove custom words
|
125 |
+
custom_words = ["s", "al", 'view', 'article', 'google', 'scholar', "scopus", "crossref"]
|
126 |
+
df['sentences'] = df['sentences'].apply(lambda x: ' '.join([word for word in x.split() if word not in (custom_words)]))
|
127 |
+
|
128 |
+
#Make Wordcloud
|
129 |
+
all_words = ' '.join([text for text in df['sentences']])
|
130 |
+
wordcloud = WordCloud(width=1024, height=1024, random_state=21, max_font_size=110).generate(all_words)
|
131 |
+
st.subheader("Wordcloud")
|
132 |
+
st.text("Total Words: " + str(len(all_words)))
|
133 |
+
plt.figure(figsize=(10, 7))
|
134 |
+
plt.imshow(wordcloud, interpolation="bilinear")
|
135 |
+
plt.axis('off')
|
136 |
+
st.pyplot(plt)
|
137 |
+
|
138 |
+
#Show Top 10 Words
|
139 |
+
word_freq = Counter(all_words.split()).most_common(1000)
|
140 |
+
df_word_freq = pd.DataFrame(word_freq, columns=['Word', 'Frequency'])
|
141 |
+
st.subheader("Top 10 Words")
|
142 |
+
st.dataframe(df_word_freq.head(1000), use_container_width=True)
|
smartfarmingsentences.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|