BulatF commited on
Commit
0ac9264
1 Parent(s): a19509b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +146 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import torch.nn.functional as F
5
+ import torch
6
+ import io
7
+ import base64
8
+ from stqdm import stqdm
9
+
10
+ # Define the model and tokenizer
11
+ model_name = 'nlptown/bert-base-multilingual-uncased-sentiment'
12
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ st.set_page_config(layout="wide")
15
+
16
+ #defs
17
+ def classify_reviews(reviews):
18
+ inputs = tokenizer(reviews, return_tensors='pt', truncation=True, padding=True, max_length=512)
19
+ outputs = model(**inputs)
20
+ probabilities = F.softmax(outputs.logits, dim=1).tolist()
21
+ return probabilities
22
+
23
+ def top_rating(scores):
24
+ return scores.index(max(scores)) + 1
25
+
26
+ def top_prob(scores):
27
+ return max(scores)
28
+
29
+ def get_table_download_link(df):
30
+ csv = df.to_csv(index=False)
31
+ b64 = base64.b64encode(csv.encode()).decode()
32
+ return f'<a href="data:file/csv;base64,{b64}" download="data.csv">Download csv file</a>'
33
+
34
+ def main():
35
+ st.title('Sentiment Analysis')
36
+ st.markdown('Upload an Excel file and select a column to get sentiment analysis.')
37
+
38
+ file = st.file_uploader("Upload an excel file", type=['xlsx'])
39
+ review_column = None
40
+ df = None
41
+
42
+ if file is not None:
43
+ try:
44
+ df = pd.read_excel(file)
45
+ review_column = st.selectbox('Select the reviews column', df.columns)
46
+ df[review_column] = df[review_column].astype(str)
47
+ except Exception as e:
48
+ st.write("An error occurred while reading the uploaded file. Please make sure it's a valid Excel file.")
49
+ return
50
+
51
+ start_button = st.button('Start Analysis')
52
+
53
+ if start_button and df is not None:
54
+ if review_column in df.columns:
55
+ with st.spinner('Performing sentiment analysis...'):
56
+ df, df_display = process_reviews(df, review_column)
57
+
58
+ display_ratings(df)
59
+ display_dataframe(df, df_display)
60
+ else:
61
+ st.write(f'No column named "{review_column}" found in the uploaded file.')
62
+
63
+
64
+ def process_reviews(df, review_column):
65
+ with st.spinner('Classifying reviews...'):
66
+ progress_bar = st.progress(0)
67
+ total_reviews = len(df[review_column].tolist())
68
+ review_counter = 0
69
+
70
+ batch_size = 50
71
+ raw_scores = []
72
+ reviews = df[review_column].tolist()
73
+ for i in range(0, len(reviews), batch_size):
74
+ batch_reviews = reviews[i:i+batch_size]
75
+ batch_scores = classify_reviews(batch_reviews)
76
+ raw_scores.extend(batch_scores)
77
+ review_counter += len(batch_reviews)
78
+ progress_bar.progress(review_counter / total_reviews)
79
+
80
+ df_new = df.copy()
81
+ df_new['raw_scores'] = raw_scores
82
+ scores_to_df(df_new)
83
+ df_display = scores_to_percent(df_new.copy())
84
+
85
+ # Get all columns excluding the created ones and the review_column
86
+ remaining_columns = [col for col in df.columns if col not in [review_column, 'raw_scores', 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star']]
87
+
88
+ # Reorder the dataframe with selected columns first, created columns next, then the remaining columns
89
+ df_new = df_new[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
90
+
91
+ # Reorder df_display as well
92
+ df_display = df_display[[review_column, 'Weighted Rating', 'Rating', 'Probability', '1 Star', '2 Star', '3 Star', '4 Star', '5 Star'] + remaining_columns]
93
+
94
+ return df_new, df_display
95
+
96
+
97
+ def scores_to_df(df):
98
+ for i in range(1, 6):
99
+ df[f'{i} Star'] = df['raw_scores'].apply(lambda scores: scores[i-1]).round(2)
100
+
101
+ df['Rating'] = df['raw_scores'].apply(top_rating)
102
+ df['Probability'] = df['raw_scores'].apply(top_prob).round(2)
103
+ # Compute the Weighted Rating
104
+ df['Weighted Rating'] = sum(df[f'{i} Star']*i for i in range(1, 6))
105
+
106
+ df.drop(columns=['raw_scores'], inplace=True)
107
+
108
+ def scores_to_percent(df):
109
+ for i in range(1, 6):
110
+ df[f'{i} Star'] = df[f'{i} Star'].apply(lambda x: f'{x*100:.0f}%')
111
+
112
+ df['Probability'] = df['Probability'].apply(lambda x: f'{x*100:.0f}%')
113
+
114
+ return df
115
+
116
+ def convert_df_to_csv(df):
117
+ return df.to_csv(index=False).encode('utf-8')
118
+
119
+ def display_dataframe(df, df_display):
120
+ csv = convert_df_to_csv(df)
121
+
122
+ col1, col2, col3, col4, col5, col6, col7, col8, col9 = st.columns(9)
123
+
124
+ with col1:
125
+ st.download_button(
126
+ "Download CSV",
127
+ csv,
128
+ "data.csv",
129
+ "text/csv",
130
+ key='download-csv'
131
+ )
132
+
133
+ st.dataframe(df_display)
134
+
135
+ def display_ratings(df):
136
+ cols = st.columns(5)
137
+
138
+ for i in range(1, 6):
139
+ rating_counts = df[df['Rating'] == i].shape[0]
140
+ cols[i-1].markdown(f"### {rating_counts}")
141
+ cols[i-1].markdown(f"{'⭐' * i}")
142
+
143
+
144
+
145
+ if __name__ == "__main__":
146
+ main()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ transformers
4
+ torch
5
+ stqdm
6
+ openpyxl