David Yeung commited on
Commit
f23a967
1 Parent(s): cd15210

updated UI, added translation to English

Browse files
Files changed (2) hide show
  1. app.py +97 -37
  2. gradio_cached_examples/10/log.csv +4 -6
app.py CHANGED
@@ -2,6 +2,7 @@
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from langchain.vectorstores import Chroma
4
  from langchain.embeddings import HuggingFaceEmbeddings
 
5
 
6
  import gradio as gr
7
  import hanzidentifier
@@ -21,6 +22,11 @@ model = AutoModelForCausalLM.from_pretrained(
21
  llm_model_name
22
  )
23
 
 
 
 
 
 
24
  tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
25
 
26
  # %%
@@ -50,6 +56,8 @@ text_output_label=["谜底","謎底","Answer"]
50
  clear_label = ["清除","清除","Clear"]
51
  submit_label = ["提交","提交","Submit"]
52
 
 
 
53
  # %%
54
  # helper functions for prompt processing for this LLM
55
 
@@ -66,7 +74,7 @@ def answer(input_text,context=None):
66
  if context:
67
  tips = "提示:\n"
68
  tips += "\n".join([x[0] for x in context])
69
- print (f"{input_text}\n{context[0][0]} {context[0][1]}")
70
  if context[0][1] >=0.9:
71
  ans = re.search(r"谜底:(\w+)", context[0][0])
72
  if ans:
@@ -77,7 +85,7 @@ def answer(input_text,context=None):
77
  prompt = f"{input_text}\n{tips}\n谜底是什么?"
78
  prompt = prompt.strip()
79
 
80
- print(prompt)
81
 
82
  messages = [
83
  {"role": "system", "content": "You are a helpful assistant."},
@@ -112,7 +120,7 @@ def helper_rag(text):
112
  #docs_out = vectordb.max_marginal_relevance_search(text,k=5,fetch_k = 20, lambda_mult = 0.5)
113
  context = []
114
  for doc in docs_out:
115
- if doc[1] > 0.5:
116
  context.append((f"{doc[0].page_content}{doc[0].metadata['answer']}", doc[1]))
117
 
118
  return context
@@ -127,11 +135,11 @@ def helper_text(text_input,radio=None):
127
 
128
  text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
129
 
130
- if not any(c in text_input for c in ["猜", "打"]):
131
- warning = "请给一个提示,提示格式,例子:猜一水果,打一字。"
132
- if chinese_type == "traditional" or radio == "繁體中文":
133
- warning = chinese_converter.to_traditional(warning)
134
- return warning
135
 
136
  text=f"""猜谜语:\n谜面:{text_input}
137
  """
@@ -151,46 +159,84 @@ def helper_text(text_input,radio=None):
151
 
152
  #return output[0]
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
 
156
  # Gradio function for configure the language of UI
157
  def change_language(radio,text_input,text_output,markdown,
158
- markdown_msg1, markdown_msg2):
159
  if radio == "简体中文":
160
  index = 0
161
- text_input_update=gr.Textbox(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
162
- text_output_update=gr.Textbox(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
163
- markdown_update=chinese_converter.to_simplified(markdown)
164
- markdown_msg1_update=chinese_converter.to_simplified(markdown_msg1)
165
- markdown_msg2_update=chinese_converter.to_simplified(markdown_msg2)
 
166
  elif radio == "繁體中文":
167
  index = 1
168
- text_input_update=gr.Textbox(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
169
- text_output_update=gr.Textbox(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
170
- markdown_update=chinese_converter.to_traditional(markdown)
171
- markdown_msg1_update=chinese_converter.to_traditional(markdown_msg1)
172
- markdown_msg2_update=chinese_converter.to_traditional(markdown_msg2)
 
173
  elif radio == "English":
174
  index = 2
175
- text_input_update=gr.Textbox(label = text_input_label[index])
176
- text_output_update=gr.Textbox(label = text_output_label[index])
177
- markdown_update=markdown
178
- markdown_msg1_update=markdown_msg1
179
- markdown_msg2_update=markdown_msg2
180
 
181
  else:
182
  index = 0
183
- text_input_update=gr.Textbox(label = text_input_label[index])
184
- text_output_update=gr.Textbox(label = text_output_label[index])
185
- markdown_update=chinese_converter.to_simplified(markdown)
186
- markdown_msg1_update=chinese_converter.to_simplified(markdown_msg1)
187
- markdown_msg2_update=chinese_converter.to_simplified(markdown_msg2)
 
188
 
189
- clear_btn_update = clear_label[index]
190
- submit_btn_update = submit_label[index]
191
 
192
- return [text_input_update,text_output_update,clear_btn_update,submit_btn_update,markdown_update,
193
- markdown_msg1_update ,markdown_msg2_update]
194
 
195
 
196
  def clear_text():
@@ -198,6 +244,13 @@ def clear_text():
198
  text_output_update=""
199
 
200
  return [text_input_update,text_output_update]
 
 
 
 
 
 
 
201
 
202
 
203
  # %%
@@ -207,13 +260,15 @@ def clear_text():
207
  # }
208
  # """
209
 
 
 
210
  with gr.Blocks() as demo:
211
  index = 0
212
  example_list = [
213
  ["小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)"],
214
  ["一物生来身穿三百多件衣,每天脱一件,年底剩张皮。(猜一物品)"],
215
  ["A thousand threads, a million strands. Reaching the water, vanishing all at once. (Hint: natural phenomenon)"],
216
- ["无底洞(猜成语)"],
217
  ]
218
  radio = gr.Radio(
219
  ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
@@ -223,6 +278,7 @@ with gr.Blocks() as demo:
223
  # Chinese Lantern Riddles Solver with LLM
224
  ## 用大语言模型来猜灯谜
225
  """,elem_id="markdown")
 
226
  with gr.Row():
227
  with gr.Column():
228
  text_input = gr.Textbox(label=text_input_label[index],
@@ -230,8 +286,10 @@ with gr.Blocks() as demo:
230
  with gr.Row():
231
  clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
232
  submit_btn = gr.Button(value=submit_label[index], variant = "primary")
233
-
234
  text_output = gr.Textbox(label=text_output_label[index])
 
 
235
 
236
 
237
  examples = gr.Examples(
@@ -296,11 +354,13 @@ with gr.Blocks() as demo:
296
 
297
  submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
298
 
 
 
299
  clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
300
  radio.change(fn=change_language,inputs=[radio,text_input,text_output,
301
- markdown, markdown_msg1,markdown_msg2],
302
  outputs=[text_input,text_output,clear_btn,submit_btn,
303
- markdown, markdown_msg1,markdown_msg2])
304
 
305
  #demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
306
  # flagging_options=["Inappropriate"],allow_flagging="never",
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
3
  from langchain.vectorstores import Chroma
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
+ from peft import AutoPeftModelForCausalLM
6
 
7
  import gradio as gr
8
  import hanzidentifier
 
22
  llm_model_name
23
  )
24
 
25
+ #model = AutoPeftModelForCausalLM.from_pretrained(
26
+ # "Qwen1.5_0.5B_Chat_sft_full/checkpoint-300",
27
+ # low_cpu_mem_usage=True,
28
+ #)
29
+
30
  tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
31
 
32
  # %%
 
56
  clear_label = ["清除","清除","Clear"]
57
  submit_label = ["提交","提交","Submit"]
58
 
59
+ threshold = 0.7
60
+
61
  # %%
62
  # helper functions for prompt processing for this LLM
63
 
 
74
  if context:
75
  tips = "提示:\n"
76
  tips += "\n".join([x[0] for x in context])
77
+ print (f"====\n{input_text}\n{context[0][0]} {context[0][1]}")
78
  if context[0][1] >=0.9:
79
  ans = re.search(r"谜底:(\w+)", context[0][0])
80
  if ans:
 
85
  prompt = f"{input_text}\n{tips}\n谜底是什么?"
86
  prompt = prompt.strip()
87
 
88
+ print(f"===\n{prompt}")
89
 
90
  messages = [
91
  {"role": "system", "content": "You are a helpful assistant."},
 
120
  #docs_out = vectordb.max_marginal_relevance_search(text,k=5,fetch_k = 20, lambda_mult = 0.5)
121
  context = []
122
  for doc in docs_out:
123
+ if doc[1] > threshold:
124
  context.append((f"{doc[0].page_content}{doc[0].metadata['answer']}", doc[1]))
125
 
126
  return context
 
135
 
136
  text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
137
 
138
+ #if not any(c in text_input for c in ["猜", "打"]):
139
+ # warning = "请给一个提示,提示格式,例子:猜一水果,打一字。"
140
+ # if chinese_type == "traditional" or radio == "繁體中文":
141
+ # warning = chinese_converter.to_traditional(warning)
142
+ # return warning
143
 
144
  text=f"""猜谜语:\n谜面:{text_input}
145
  """
 
159
 
160
  #return output[0]
161
 
162
+ # get answer from LLM with prompt input
163
+ def translate(input_text):
164
+ '''Use LLM for translation'''
165
+
166
+ prompt = f"""翻译以下內容成英语:
167
+
168
+ {input_text}
169
+ """
170
+ print(prompt)
171
+
172
+ messages = [
173
+ {"role": "system", "content": "You are a helpful assistant."},
174
+ {"role": "user", "content": prompt}
175
+ ]
176
+ text = tokenizer.apply_chat_template(
177
+ messages,
178
+ tokenize=False,
179
+ add_generation_prompt=True
180
+ )
181
+ model_inputs = tokenizer([text], return_tensors="pt").to(device="cpu")
182
+
183
+ generated_ids = model.generate(
184
+ model_inputs.input_ids,
185
+ max_new_tokens=128,
186
+ do_sample=False,
187
+ top_p=0.0
188
+ )
189
+ generated_ids = [
190
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
191
+ ]
192
+
193
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
194
+
195
+ #return out_text[0]["generated_text"]
196
+ return response
197
+ #return postprocess(out_text[0]["generated_text"])
198
+
199
 
200
 
201
  # Gradio function for configure the language of UI
202
  def change_language(radio,text_input,text_output,markdown,
203
+ markdown_msg1, markdown_msg2,translate_btn):
204
  if radio == "简体中文":
205
  index = 0
206
+ text_input=gr.Textbox(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
207
+ text_output=gr.Textbox(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
208
+ markdown=chinese_converter.to_simplified(markdown)
209
+ markdown_msg1=chinese_converter.to_simplified(markdown_msg1)
210
+ markdown_msg2=chinese_converter.to_simplified(markdown_msg2)
211
+ translate_btn=gr.Button(visible=False)
212
  elif radio == "繁體中文":
213
  index = 1
214
+ text_input=gr.Textbox(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
215
+ text_output=gr.Textbox(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
216
+ markdown=chinese_converter.to_traditional(markdown)
217
+ markdown_msg1=chinese_converter.to_traditional(markdown_msg1)
218
+ markdown_msg2=chinese_converter.to_traditional(markdown_msg2)
219
+ translate_btn=gr.Button(visible=False)
220
  elif radio == "English":
221
  index = 2
222
+ text_input=gr.Textbox(label = text_input_label[index])
223
+ text_output=gr.Textbox(label = text_output_label[index])
224
+ translate_btn=gr.Button(visible=True)
 
 
225
 
226
  else:
227
  index = 0
228
+ text_input=gr.Textbox(label = text_input_label[index])
229
+ text_output=gr.Textbox(label = text_output_label[index])
230
+ markdown=chinese_converter.to_simplified(markdown)
231
+ markdown_msg1=chinese_converter.to_simplified(markdown_msg1)
232
+ markdown_msg2=chinese_converter.to_simplified(markdown_msg2)
233
+ translate_btn=gr.Button(visible=False)
234
 
235
+ clear_btn = clear_label[index]
236
+ submit_btn = submit_label[index]
237
 
238
+ return [text_input,text_output,clear_btn,submit_btn,markdown,
239
+ markdown_msg1 ,markdown_msg2,translate_btn]
240
 
241
 
242
  def clear_text():
 
244
  text_output_update=""
245
 
246
  return [text_input_update,text_output_update]
247
+
248
+ def translate_text(text_input,text_output):
249
+
250
+ text_input = translate(f"{text_input}")
251
+ text_output = translate(f"{text_output}")
252
+
253
+ return text_input,text_output
254
 
255
 
256
  # %%
 
260
  # }
261
  # """
262
 
263
+
264
+
265
  with gr.Blocks() as demo:
266
  index = 0
267
  example_list = [
268
  ["小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)"],
269
  ["一物生来身穿三百多件衣,每天脱一件,年底剩张皮。(猜一物品)"],
270
  ["A thousand threads, a million strands. Reaching the water, vanishing all at once. (Hint: natural phenomenon)"],
271
+ ["无底洞"],
272
  ]
273
  radio = gr.Radio(
274
  ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
 
278
  # Chinese Lantern Riddles Solver with LLM
279
  ## 用大语言模型来猜灯谜
280
  """,elem_id="markdown")
281
+
282
  with gr.Row():
283
  with gr.Column():
284
  text_input = gr.Textbox(label=text_input_label[index],
 
286
  with gr.Row():
287
  clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
288
  submit_btn = gr.Button(value=submit_label[index], variant = "primary")
289
+
290
  text_output = gr.Textbox(label=text_output_label[index])
291
+
292
+ translate_btn = gr.Button(value="Translate", variant = "primary", scale=0, visible=False)
293
 
294
 
295
  examples = gr.Examples(
 
354
 
355
  submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
356
 
357
+ translate_btn.click(fn=translate_text, inputs=[text_input,text_output], outputs=[text_input,text_output])
358
+
359
  clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
360
  radio.change(fn=change_language,inputs=[radio,text_input,text_output,
361
+ markdown, markdown_msg1,markdown_msg2,translate_btn],
362
  outputs=[text_input,text_output,clear_btn,submit_btn,
363
+ markdown, markdown_msg1,markdown_msg2,translate_btn])
364
 
365
  #demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
366
  # flagging_options=["Inappropriate"],allow_flagging="never",
gradio_cached_examples/10/log.csv CHANGED
@@ -1,7 +1,5 @@
1
  谜底,flag,username,timestamp
2
- 这个谜面描述了一个小家伙穿着黄色的袍子,在花丛中建造房子。这个小家伙是蜜蜂,因为蜜蜂在花朵上采集花蜜,然后将这些花蜜转化为蜂蜜。所以答案是蜜蜂。,,,2024-02-19 01:00:57.388598
3
- 谜底是日历。因为日历上的日期会随着时间的推移而改变,所以每过一年,就会有一层新的布料覆盖在上面,这就是所谓的“年轮”。因此,当最后一层布料被剥落时,就只剩下了一张没有了皮的日历。,,,2024-02-19 01:01:02.376888
4
- "这个谜面描述了一个自然现象,即“千条线,万条线,掉到水里看不见”。这个现象通常指的是水流在地面上形成的小水滴或小水泡,这些小水滴或小水泡会随着水流的流动而消失不见。
5
-
6
- 这个谜底是雨水,因为雨水是由水滴和小水泡组成的,当它们落在地面上时,由于受到重力的作用,就会被冲走。因此,这个谜底就是雨水。",,,2024-02-19 01:01:10.248405
7
- 这个谜面是通过“无底洞”来比喻一个无法预测或理解的、非常深邃的地方,因此可以推断出成语“深不可测”。,,,2024-02-19 01:01:13.125556
 
1
  谜底,flag,username,timestamp
2
+ 谜底是:蜜蜂,,,2024-02-22 22:14:00.108103
3
+ 谜底是:日历,,,2024-02-22 22:14:01.397728
4
+ 谜底是:雨,,,2024-02-22 22:14:02.151370
5
+ 谜底是:深不可测,,,2024-02-22 22:14:02.200411