ghost-love-you commited on
Commit
a66c985
1 Parent(s): eb4ec66

chore: Update app.py with new translation and sentiment analysis models

Browse files
Files changed (2) hide show
  1. Dockerfile +21 -0
  2. app.py +20 -2
Dockerfile ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ build-essential \
7
+ curl \
8
+ software-properties-common \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # RUN git clone https://github.com/streamlit/streamlit-example.git .
13
+ COPY . .
14
+
15
+ RUN pip3 install -r requirements.txt
16
+
17
+ EXPOSE 8501
18
+
19
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
20
+
21
+ ENTRYPOINT ["streamlit", "run", "streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
3
  from GoogleNews import GoogleNews
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
@@ -172,14 +172,32 @@ def display_sentiments(translate_comments, batch_size=16):
172
  cur_date = datetime.datetime.now().strftime("%Y-%m-%d")
173
  selected_date = st.date_input("选择日期", value=pd.to_datetime(cur_date))
174
 
 
 
 
 
 
 
 
 
 
175
  settings = {
176
  "max_comments": 99999,
177
  "translate_batch_size": 16,
178
  "sentiment_batch_size": 16,
 
 
179
  }
180
 
181
  with st.sidebar:
182
  st.title("设置")
 
 
 
 
 
 
 
183
  st.header("最大获取帖子数")
184
  settings["max_comments"] = st.number_input("Max Comments", 1, 99999, 99999)
185
 
@@ -192,7 +210,7 @@ with st.sidebar:
192
 
193
  if st.button("统计"):
194
  with st.spinner("正在加载模型 ..."):
195
- classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)
196
  with st.spinner("正在获取当天的帖子 ..."):
197
  comments = get_comments(selected_date, settings["max_comments"])
198
  st.dataframe(comments)
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification
3
  from GoogleNews import GoogleNews
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
 
172
  cur_date = datetime.datetime.now().strftime("%Y-%m-%d")
173
  selected_date = st.date_input("选择日期", value=pd.to_datetime(cur_date))
174
 
175
+ model_translate = [
176
+ "Helsinki-NLP/opus-mt-zh-en"
177
+ ]
178
+
179
+ model_emo_analysis = [
180
+ "orlco/google-bert-base-cased-fine-tune",
181
+ "SamLowe/roberta-base-go_emotions"
182
+ ]
183
+
184
  settings = {
185
  "max_comments": 99999,
186
  "translate_batch_size": 16,
187
  "sentiment_batch_size": 16,
188
+ "model_translate": model_translate[0],
189
+ "model_emo_analysis": model_emo_analysis[0]
190
  }
191
 
192
  with st.sidebar:
193
  st.title("设置")
194
+
195
+ st.header("翻译模型")
196
+ settings["model_translate"] = st.selectbox("Model", model_translate)
197
+
198
+ st.header("情感分析模型")
199
+ settings["model_emo_analysis"] = st.selectbox("Model", model_emo_analysis)
200
+
201
  st.header("最大获取帖子数")
202
  settings["max_comments"] = st.number_input("Max Comments", 1, 99999, 99999)
203
 
 
210
 
211
  if st.button("统计"):
212
  with st.spinner("正在加载模型 ..."):
213
+ classifier = pipeline(task="text-classification", model=settings["model_emo_analysis"], top_k=None)
214
  with st.spinner("正在获取当天的帖子 ..."):
215
  comments = get_comments(selected_date, settings["max_comments"])
216
  st.dataframe(comments)