Cloudy1225 commited on
Commit
23ddf94
·
verified ·
1 Parent(s): 5ac8a66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -200
app.py CHANGED
@@ -1,200 +1,200 @@
1
- import csv
2
- import gradio as gr
3
- import pandas as pd
4
- from sentiment_analyser import RandomAnalyser, RoBERTaAnalyser, ChatGPTAnalyser
5
- import matplotlib.pyplot as plt
6
- from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
7
-
8
-
9
- def plot_bar(value_counts):
10
- fig, ax = plt.subplots(figsize=(6, 6))
11
- value_counts.plot.barh(ax=ax)
12
- ax.bar_label(ax.containers[0])
13
- plt.title('Frequency of Predictions')
14
- return fig
15
-
16
-
17
- def plot_confusion_matrix(y_pred, y_true):
18
- cm = confusion_matrix(y_true, y_pred, normalize='true')
19
- fig, ax = plt.subplots(figsize=(6, 6))
20
- labels = []
21
- for label in SENTI_MAPPING.keys():
22
- if (label in y_pred.values) or (label in y_true.values):
23
- labels.append(label)
24
- disp = ConfusionMatrixDisplay(confusion_matrix=cm,
25
- display_labels=labels)
26
- disp.plot(cmap="Blues", values_format=".2f", ax=ax, colorbar=False)
27
- plt.title("Normalized Confusion Matrix")
28
- return fig
29
-
30
-
31
- def classify(num: int):
32
- samples_df = df.sample(num)
33
- X = samples_df['Text'].tolist()
34
- y = samples_df['Label']
35
- roberta = MODEL_MAPPING[OUR_MODEL]
36
- y_pred = pd.Series(roberta.predict(X), index=samples_df.index)
37
- samples_df['Predict'] = y_pred
38
- bar = plot_bar(y_pred.value_counts())
39
- cm = plot_confusion_matrix(y_pred, y)
40
- plt.close()
41
- return samples_df, bar, cm
42
-
43
-
44
- def analysis(Text):
45
- keys = []
46
- values = []
47
- for name, model in MODEL_MAPPING.items():
48
- keys.append(name)
49
- values.append(SENTI_MAPPING[model.predict([Text])[0]])
50
- return pd.DataFrame([values], columns=keys)
51
-
52
-
53
- def analyse_file(file):
54
- output_name = 'output.csv'
55
- with open(output_name, mode='w', newline='') as output:
56
- writer = csv.writer(output)
57
- header = ['Text', 'Label']
58
- writer.writerow(header)
59
- model = MODEL_MAPPING[OUR_MODEL]
60
- with open(file.name) as f:
61
- for line in f:
62
- text = line[:-1]
63
- sentiment = model.predict([text])
64
- writer.writerow([text, sentiment[0]])
65
- return output_name
66
-
67
-
68
- MODEL_MAPPING = {
69
- 'Random': RandomAnalyser(),
70
- 'RoBERTa': RoBERTaAnalyser(),
71
- 'ChatGPT': RandomAnalyser(),
72
- }
73
-
74
- OUR_MODEL = 'RoBERTa'
75
-
76
- SENTI_MAPPING = {
77
- 'negative': '😭',
78
- 'neutral': '😶',
79
- 'positive': '🥰'
80
- }
81
-
82
- TITLE = "Sentiment Analysis on Software Engineer Texts"
83
-
84
- DESCRIPTION = {
85
- 'en': (
86
- "This is the demo page for our model: "
87
- "[Cloudy1225/stackoverflow-roberta-base-sentiment]"
88
- "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
89
- ),
90
- 'zh': (
91
- "这里是第16组“睿王和他的五个小跟班”软工三迭代三模型演示页面。"
92
- "模型链接:[Cloudy1225/stackoverflow-roberta-base-sentiment]"
93
- "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
94
- )
95
- }
96
-
97
- PROMPT1 = {
98
- 'en': (
99
- "Enter text in the left text box and press Enter, and the sentiment analysis results will be output on the right. "
100
- "Here, we present three types of results, which come from random, our model, and ChatGPT."
101
- ),
102
- 'zh': (
103
- "在左侧文本框中输入文本并按回车键,右侧将输出情感分析结果。"
104
- "这里我们展示了三种结果,分别是随机结果、模型结果和 ChatGPT 结果。"
105
- )
106
- }
107
-
108
- PROMPT2 = {
109
- 'en': (
110
- "Upload a txt/csv file in the left file box, and the model will perform sentiment analysis on each line of the input text. "
111
- "You can download the output file on the right. "
112
- "The output file will be in CSV format with two columns: the original text, and the classification results."
113
- ),
114
- 'zh': (
115
- "在左侧文件框中上传 txt/csv 文件,模型会对输入文本的每一行当作一个文本进行情感分析。"
116
- "可以在右侧下载输出文件,输出文件为两列 csv 格式,第一列为原始文本,第二列为分类结果。"
117
- )
118
- }
119
-
120
- PROMPT3 = {
121
- 'en': (
122
- "Here we evaluate our model on the StackOverflow4423 dataset. "
123
- "Sliding the slider will sample a specified number of samples from the StackOverflow4423 dataset and predict their sentiment labels. "
124
- "Based on the prediction results, a label distribution chart and a confusion matrix will be plotted."
125
- ),
126
- 'zh': (
127
- "这里是在 StackOverflow4423 数据集上评估我们的模型。"
128
- "滑动 Slider,将会从 StackOverflow4423 数据集中抽样出指定数量的样本,预测其情感标签。"
129
- "并根据预测结果绘制标签分布图和混淆矩阵。"
130
- )
131
- }
132
-
133
- DEFAULT_LANG = 'en'
134
-
135
- MAX_SAMPLES = 64
136
-
137
- df = pd.read_csv('./SOF4423.csv')
138
-
139
-
140
- def set_language(lang):
141
- return DESCRIPTION[lang], PROMPT1[lang], PROMPT2[lang], PROMPT3[lang]
142
-
143
-
144
- with gr.Blocks(title=TITLE) as demo:
145
- with gr.Row():
146
- with gr.Column():
147
- gr.HTML(f"<H1>{TITLE}</H1>")
148
- with gr.Column():
149
- language_selector = gr.Radio(
150
- ['en', 'zh'], label="Select Language", value=DEFAULT_LANG,
151
- interactive=True, show_label=False, container=False
152
- )
153
-
154
- description = gr.Markdown(DESCRIPTION[DEFAULT_LANG])
155
- gr.HTML("<H2>Model Inference</H2>")
156
- prompt1 = gr.Markdown(PROMPT1[DEFAULT_LANG])
157
- with gr.Row():
158
- with gr.Column():
159
- text_input = gr.Textbox(label='Input',
160
- placeholder="Enter a positive or negative sentence here...")
161
- with gr.Column():
162
- senti_output = gr.Dataframe(type="pandas", value=[['😋', '😋', '😋']],
163
- headers=list(MODEL_MAPPING.keys()), interactive=False)
164
- text_input.submit(analysis, inputs=text_input, outputs=senti_output, show_progress='full')
165
-
166
- prompt2 = gr.Markdown(PROMPT2[DEFAULT_LANG])
167
- with gr.Row():
168
- with gr.Column():
169
- file_input = gr.File(label='File',
170
- file_types=['.txt', '.csv'])
171
- with gr.Column():
172
- file_output = gr.File(label='Output')
173
- file_input.upload(analyse_file, inputs=file_input, outputs=file_output)
174
-
175
- gr.HTML("<H2>Model Evaluation</H2>")
176
- prompt3 = gr.Markdown(PROMPT3[DEFAULT_LANG])
177
- input_models = list(MODEL_MAPPING)
178
- input_n_samples = gr.Slider(
179
- minimum=4,
180
- maximum=MAX_SAMPLES,
181
- value=8,
182
- step=4,
183
- label='Number of samples'
184
- )
185
-
186
- with gr.Row():
187
- with gr.Column():
188
- bar_plot = gr.Plot(label='Predictions Frequency')
189
- with gr.Column():
190
- cm_plot = gr.Plot(label='Confusion Matrix')
191
-
192
- with gr.Row():
193
- dataframe = gr.Dataframe(type="pandas", wrap=True, headers=['Text', 'Label', 'Predict'])
194
-
195
- input_n_samples.change(fn=classify, inputs=input_n_samples, outputs=[dataframe, bar_plot, cm_plot])
196
-
197
- language_selector.change(fn=set_language, inputs=language_selector,
198
- outputs=[description, prompt1, prompt2, prompt3])
199
-
200
- demo.launch()
 
1
+ import csv
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from sentiment_analyser import RandomAnalyser, RoBERTaAnalyser, ChatGPTAnalyser
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
7
+
8
+
9
+ def plot_bar(value_counts):
10
+ fig, ax = plt.subplots(figsize=(6, 6))
11
+ value_counts.plot.barh(ax=ax)
12
+ ax.bar_label(ax.containers[0])
13
+ plt.title('Frequency of Predictions')
14
+ return fig
15
+
16
+
17
+ def plot_confusion_matrix(y_pred, y_true):
18
+ cm = confusion_matrix(y_true, y_pred, normalize='true')
19
+ fig, ax = plt.subplots(figsize=(6, 6))
20
+ labels = []
21
+ for label in SENTI_MAPPING.keys():
22
+ if (label in y_pred.values) or (label in y_true.values):
23
+ labels.append(label)
24
+ disp = ConfusionMatrixDisplay(confusion_matrix=cm,
25
+ display_labels=labels)
26
+ disp.plot(cmap="Blues", values_format=".2f", ax=ax, colorbar=False)
27
+ plt.title("Normalized Confusion Matrix")
28
+ return fig
29
+
30
+
31
+ def classify(num: int):
32
+ samples_df = df.sample(num)
33
+ X = samples_df['Text'].tolist()
34
+ y = samples_df['Label']
35
+ roberta = MODEL_MAPPING[OUR_MODEL]
36
+ y_pred = pd.Series(roberta.predict(X), index=samples_df.index)
37
+ samples_df['Predict'] = y_pred
38
+ bar = plot_bar(y_pred.value_counts())
39
+ cm = plot_confusion_matrix(y_pred, y)
40
+ plt.close()
41
+ return samples_df, bar, cm
42
+
43
+
44
+ def analysis(Text):
45
+ keys = []
46
+ values = []
47
+ for name, model in MODEL_MAPPING.items():
48
+ keys.append(name)
49
+ values.append(SENTI_MAPPING[model.predict([Text])[0]])
50
+ return pd.DataFrame([values], columns=keys)
51
+
52
+
53
+ def analyse_file(file):
54
+ output_name = 'output.csv'
55
+ with open(output_name, mode='w', newline='') as output:
56
+ writer = csv.writer(output)
57
+ header = ['Text', 'Label']
58
+ writer.writerow(header)
59
+ model = MODEL_MAPPING[OUR_MODEL]
60
+ with open(file.name) as f:
61
+ for line in f:
62
+ text = line[:-1]
63
+ sentiment = model.predict([text])
64
+ writer.writerow([text, sentiment[0]])
65
+ return output_name
66
+
67
+
68
+ MODEL_MAPPING = {
69
+ 'Random': RandomAnalyser(),
70
+ 'RoBERTa': RoBERTaAnalyser(),
71
+ 'ChatGPT': RandomAnalyser(),
72
+ }
73
+
74
+ OUR_MODEL = 'RoBERTa'
75
+
76
+ SENTI_MAPPING = {
77
+ 'negative': '😭',
78
+ 'neutral': '😶',
79
+ 'positive': '🥰'
80
+ }
81
+
82
+ TITLE = "Sentiment Analysis on Software Engineer Texts"
83
+
84
+ DESCRIPTION = {
85
+ 'en': (
86
+ "This is the demo page for our model: "
87
+ "[Cloudy1225/stackoverflow-roberta-base-sentiment]"
88
+ "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
89
+ ),
90
+ 'zh': (
91
+ "这里是第16组“睿王和他的五个小跟班”软工三��代三模型演示页面。"
92
+ "模型链接:[Cloudy1225/stackoverflow-roberta-base-sentiment]"
93
+ "(https://huggingface.co/Cloudy1225/stackoverflow-roberta-base-sentiment)."
94
+ )
95
+ }
96
+
97
+ PROMPT1 = {
98
+ 'en': (
99
+ "Enter text in the left text box and press Enter, and the sentiment analysis results will be output on the right. "
100
+ "Here, we present three types of results, which come from random, our model, and ChatGPT."
101
+ ),
102
+ 'zh': (
103
+ "在左侧文本框中输入文本并按回车键,右侧将输出情感分析结果。"
104
+ "这里我们展示了三种结果,分别是随机结果、模型结果和 ChatGPT 结果。"
105
+ )
106
+ }
107
+
108
+ PROMPT2 = {
109
+ 'en': (
110
+ "Upload a txt/csv file in the left file box, and the model will perform sentiment analysis on each line of the input text. "
111
+ "You can download the output file on the right. "
112
+ "The output file will be in CSV format with two columns: the original text, and the classification results."
113
+ ),
114
+ 'zh': (
115
+ "在左侧文件框中上传 txt/csv 文件,模型会对输入文本的每一行当作一个文本进行情感分析。"
116
+ "可以在右侧下载输出文件,输出文件为两列 csv 格式,第一列为原始文本,第二列为分类结果。"
117
+ )
118
+ }
119
+
120
+ PROMPT3 = {
121
+ 'en': (
122
+ "Here we evaluate our model on the StackOverflow4423 dataset. "
123
+ "Sliding the slider will sample a specified number of samples from the StackOverflow4423 dataset and predict their sentiment labels. "
124
+ "Based on the prediction results, a label distribution chart and a confusion matrix will be plotted."
125
+ ),
126
+ 'zh': (
127
+ "这里是在 StackOverflow4423 数据集上评估我们的模型。"
128
+ "滑动 Slider,将会从 StackOverflow4423 数据集中抽样出指定数量的样本,预测其情感标签。"
129
+ "并根据预测结果绘制标签分布图和混淆矩阵。"
130
+ )
131
+ }
132
+
133
+ DEFAULT_LANG = 'en'
134
+
135
+ MAX_SAMPLES = 64
136
+
137
+ df = pd.read_csv('./SOF4423.csv')
138
+
139
+
140
+ def set_language(lang):
141
+ return DESCRIPTION[lang], PROMPT1[lang], PROMPT2[lang], PROMPT3[lang]
142
+
143
+
144
+ with gr.Blocks(title=TITLE) as demo:
145
+ with gr.Row():
146
+ with gr.Column():
147
+ gr.HTML(f"<H1>{TITLE}</H1>")
148
+ with gr.Column(min_width=160):
149
+ language_selector = gr.Radio(
150
+ ['en', 'zh'], label="Select Language", value=DEFAULT_LANG,
151
+ interactive=True, show_label=False, container=False
152
+ )
153
+
154
+ description = gr.Markdown(DESCRIPTION[DEFAULT_LANG])
155
+ gr.HTML("<H2>Model Inference</H2>")
156
+ prompt1 = gr.Markdown(PROMPT1[DEFAULT_LANG])
157
+ with gr.Row():
158
+ with gr.Column():
159
+ text_input = gr.Textbox(label='Input',
160
+ placeholder="Enter a positive or negative sentence here...")
161
+ with gr.Column():
162
+ senti_output = gr.Dataframe(type="pandas", value=[['😋', '😋', '😋']],
163
+ headers=list(MODEL_MAPPING.keys()), interactive=False)
164
+ text_input.submit(analysis, inputs=text_input, outputs=senti_output, show_progress='full')
165
+
166
+ prompt2 = gr.Markdown(PROMPT2[DEFAULT_LANG])
167
+ with gr.Row():
168
+ with gr.Column():
169
+ file_input = gr.File(label='File',
170
+ file_types=['.txt', '.csv'])
171
+ with gr.Column():
172
+ file_output = gr.File(label='Output')
173
+ file_input.upload(analyse_file, inputs=file_input, outputs=file_output)
174
+
175
+ gr.HTML("<H2>Model Evaluation</H2>")
176
+ prompt3 = gr.Markdown(PROMPT3[DEFAULT_LANG])
177
+ input_models = list(MODEL_MAPPING)
178
+ input_n_samples = gr.Slider(
179
+ minimum=4,
180
+ maximum=MAX_SAMPLES,
181
+ value=8,
182
+ step=4,
183
+ label='Number of samples'
184
+ )
185
+
186
+ with gr.Row():
187
+ with gr.Column():
188
+ bar_plot = gr.Plot(label='Predictions Frequency')
189
+ with gr.Column():
190
+ cm_plot = gr.Plot(label='Confusion Matrix')
191
+
192
+ with gr.Row():
193
+ dataframe = gr.Dataframe(type="pandas", wrap=True, headers=['Text', 'Label', 'Predict'])
194
+
195
+ input_n_samples.change(fn=classify, inputs=input_n_samples, outputs=[dataframe, bar_plot, cm_plot])
196
+
197
+ language_selector.change(fn=set_language, inputs=language_selector,
198
+ outputs=[description, prompt1, prompt2, prompt3])
199
+
200
+ demo.launch()