updated UI, added translation to English
Browse files- app.py +97 -37
- gradio_cached_examples/10/log.csv +4 -6
app.py
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
3 |
from langchain.vectorstores import Chroma
|
4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
5 |
|
6 |
import gradio as gr
|
7 |
import hanzidentifier
|
@@ -21,6 +22,11 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
21 |
llm_model_name
|
22 |
)
|
23 |
|
|
|
|
|
|
|
|
|
|
|
24 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
25 |
|
26 |
# %%
|
@@ -50,6 +56,8 @@ text_output_label=["谜底","謎底","Answer"]
|
|
50 |
clear_label = ["清除","清除","Clear"]
|
51 |
submit_label = ["提交","提交","Submit"]
|
52 |
|
|
|
|
|
53 |
# %%
|
54 |
# helper functions for prompt processing for this LLM
|
55 |
|
@@ -66,7 +74,7 @@ def answer(input_text,context=None):
|
|
66 |
if context:
|
67 |
tips = "提示:\n"
|
68 |
tips += "\n".join([x[0] for x in context])
|
69 |
-
print (f"{input_text}\n{context[0][0]} {context[0][1]}")
|
70 |
if context[0][1] >=0.9:
|
71 |
ans = re.search(r"谜底:(\w+)", context[0][0])
|
72 |
if ans:
|
@@ -77,7 +85,7 @@ def answer(input_text,context=None):
|
|
77 |
prompt = f"{input_text}\n{tips}\n谜底是什么?"
|
78 |
prompt = prompt.strip()
|
79 |
|
80 |
-
print(prompt)
|
81 |
|
82 |
messages = [
|
83 |
{"role": "system", "content": "You are a helpful assistant."},
|
@@ -112,7 +120,7 @@ def helper_rag(text):
|
|
112 |
#docs_out = vectordb.max_marginal_relevance_search(text,k=5,fetch_k = 20, lambda_mult = 0.5)
|
113 |
context = []
|
114 |
for doc in docs_out:
|
115 |
-
if doc[1] >
|
116 |
context.append((f"{doc[0].page_content}{doc[0].metadata['answer']}", doc[1]))
|
117 |
|
118 |
return context
|
@@ -127,11 +135,11 @@ def helper_text(text_input,radio=None):
|
|
127 |
|
128 |
text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
|
129 |
|
130 |
-
if not any(c in text_input for c in ["猜", "打"]):
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
|
136 |
text=f"""猜谜语:\n谜面:{text_input}
|
137 |
"""
|
@@ -151,46 +159,84 @@ def helper_text(text_input,radio=None):
|
|
151 |
|
152 |
#return output[0]
|
153 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
|
155 |
|
156 |
# Gradio function for configure the language of UI
|
157 |
def change_language(radio,text_input,text_output,markdown,
|
158 |
-
markdown_msg1, markdown_msg2):
|
159 |
if radio == "简体中文":
|
160 |
index = 0
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
166 |
elif radio == "繁體中文":
|
167 |
index = 1
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
173 |
elif radio == "English":
|
174 |
index = 2
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
markdown_msg1_update=markdown_msg1
|
179 |
-
markdown_msg2_update=markdown_msg2
|
180 |
|
181 |
else:
|
182 |
index = 0
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
188 |
|
189 |
-
|
190 |
-
|
191 |
|
192 |
-
return [
|
193 |
-
|
194 |
|
195 |
|
196 |
def clear_text():
|
@@ -198,6 +244,13 @@ def clear_text():
|
|
198 |
text_output_update=""
|
199 |
|
200 |
return [text_input_update,text_output_update]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
|
203 |
# %%
|
@@ -207,13 +260,15 @@ def clear_text():
|
|
207 |
# }
|
208 |
# """
|
209 |
|
|
|
|
|
210 |
with gr.Blocks() as demo:
|
211 |
index = 0
|
212 |
example_list = [
|
213 |
["小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)"],
|
214 |
["一物生来身穿三百多件衣,每天脱一件,年底剩张皮。(猜一物品)"],
|
215 |
["A thousand threads, a million strands. Reaching the water, vanishing all at once. (Hint: natural phenomenon)"],
|
216 |
-
["
|
217 |
]
|
218 |
radio = gr.Radio(
|
219 |
["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
|
@@ -223,6 +278,7 @@ with gr.Blocks() as demo:
|
|
223 |
# Chinese Lantern Riddles Solver with LLM
|
224 |
## 用大语言模型来猜灯谜
|
225 |
""",elem_id="markdown")
|
|
|
226 |
with gr.Row():
|
227 |
with gr.Column():
|
228 |
text_input = gr.Textbox(label=text_input_label[index],
|
@@ -230,8 +286,10 @@ with gr.Blocks() as demo:
|
|
230 |
with gr.Row():
|
231 |
clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
|
232 |
submit_btn = gr.Button(value=submit_label[index], variant = "primary")
|
233 |
-
|
234 |
text_output = gr.Textbox(label=text_output_label[index])
|
|
|
|
|
235 |
|
236 |
|
237 |
examples = gr.Examples(
|
@@ -296,11 +354,13 @@ with gr.Blocks() as demo:
|
|
296 |
|
297 |
submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
|
298 |
|
|
|
|
|
299 |
clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
|
300 |
radio.change(fn=change_language,inputs=[radio,text_input,text_output,
|
301 |
-
markdown, markdown_msg1,markdown_msg2],
|
302 |
outputs=[text_input,text_output,clear_btn,submit_btn,
|
303 |
-
markdown, markdown_msg1,markdown_msg2])
|
304 |
|
305 |
#demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
|
306 |
# flagging_options=["Inappropriate"],allow_flagging="never",
|
|
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
3 |
from langchain.vectorstores import Chroma
|
4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
+
from peft import AutoPeftModelForCausalLM
|
6 |
|
7 |
import gradio as gr
|
8 |
import hanzidentifier
|
|
|
22 |
llm_model_name
|
23 |
)
|
24 |
|
25 |
+
#model = AutoPeftModelForCausalLM.from_pretrained(
|
26 |
+
# "Qwen1.5_0.5B_Chat_sft_full/checkpoint-300",
|
27 |
+
# low_cpu_mem_usage=True,
|
28 |
+
#)
|
29 |
+
|
30 |
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
|
31 |
|
32 |
# %%
|
|
|
56 |
clear_label = ["清除","清除","Clear"]
|
57 |
submit_label = ["提交","提交","Submit"]
|
58 |
|
59 |
+
threshold = 0.7
|
60 |
+
|
61 |
# %%
|
62 |
# helper functions for prompt processing for this LLM
|
63 |
|
|
|
74 |
if context:
|
75 |
tips = "提示:\n"
|
76 |
tips += "\n".join([x[0] for x in context])
|
77 |
+
print (f"====\n{input_text}\n{context[0][0]} {context[0][1]}")
|
78 |
if context[0][1] >=0.9:
|
79 |
ans = re.search(r"谜底:(\w+)", context[0][0])
|
80 |
if ans:
|
|
|
85 |
prompt = f"{input_text}\n{tips}\n谜底是什么?"
|
86 |
prompt = prompt.strip()
|
87 |
|
88 |
+
print(f"===\n{prompt}")
|
89 |
|
90 |
messages = [
|
91 |
{"role": "system", "content": "You are a helpful assistant."},
|
|
|
120 |
#docs_out = vectordb.max_marginal_relevance_search(text,k=5,fetch_k = 20, lambda_mult = 0.5)
|
121 |
context = []
|
122 |
for doc in docs_out:
|
123 |
+
if doc[1] > threshold:
|
124 |
context.append((f"{doc[0].page_content}{doc[0].metadata['answer']}", doc[1]))
|
125 |
|
126 |
return context
|
|
|
135 |
|
136 |
text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
|
137 |
|
138 |
+
#if not any(c in text_input for c in ["猜", "打"]):
|
139 |
+
# warning = "请给一个提示,提示格式,例子:猜一水果,打一字。"
|
140 |
+
# if chinese_type == "traditional" or radio == "繁體中文":
|
141 |
+
# warning = chinese_converter.to_traditional(warning)
|
142 |
+
# return warning
|
143 |
|
144 |
text=f"""猜谜语:\n谜面:{text_input}
|
145 |
"""
|
|
|
159 |
|
160 |
#return output[0]
|
161 |
|
162 |
+
# get answer from LLM with prompt input
|
163 |
+
def translate(input_text):
|
164 |
+
'''Use LLM for translation'''
|
165 |
+
|
166 |
+
prompt = f"""翻译以下內容成英语:
|
167 |
+
|
168 |
+
{input_text}
|
169 |
+
"""
|
170 |
+
print(prompt)
|
171 |
+
|
172 |
+
messages = [
|
173 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
174 |
+
{"role": "user", "content": prompt}
|
175 |
+
]
|
176 |
+
text = tokenizer.apply_chat_template(
|
177 |
+
messages,
|
178 |
+
tokenize=False,
|
179 |
+
add_generation_prompt=True
|
180 |
+
)
|
181 |
+
model_inputs = tokenizer([text], return_tensors="pt").to(device="cpu")
|
182 |
+
|
183 |
+
generated_ids = model.generate(
|
184 |
+
model_inputs.input_ids,
|
185 |
+
max_new_tokens=128,
|
186 |
+
do_sample=False,
|
187 |
+
top_p=0.0
|
188 |
+
)
|
189 |
+
generated_ids = [
|
190 |
+
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
191 |
+
]
|
192 |
+
|
193 |
+
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
194 |
+
|
195 |
+
#return out_text[0]["generated_text"]
|
196 |
+
return response
|
197 |
+
#return postprocess(out_text[0]["generated_text"])
|
198 |
+
|
199 |
|
200 |
|
201 |
# Gradio function for configure the language of UI
|
202 |
def change_language(radio,text_input,text_output,markdown,
|
203 |
+
markdown_msg1, markdown_msg2,translate_btn):
|
204 |
if radio == "简体中文":
|
205 |
index = 0
|
206 |
+
text_input=gr.Textbox(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
|
207 |
+
text_output=gr.Textbox(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
|
208 |
+
markdown=chinese_converter.to_simplified(markdown)
|
209 |
+
markdown_msg1=chinese_converter.to_simplified(markdown_msg1)
|
210 |
+
markdown_msg2=chinese_converter.to_simplified(markdown_msg2)
|
211 |
+
translate_btn=gr.Button(visible=False)
|
212 |
elif radio == "繁體中文":
|
213 |
index = 1
|
214 |
+
text_input=gr.Textbox(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
|
215 |
+
text_output=gr.Textbox(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
|
216 |
+
markdown=chinese_converter.to_traditional(markdown)
|
217 |
+
markdown_msg1=chinese_converter.to_traditional(markdown_msg1)
|
218 |
+
markdown_msg2=chinese_converter.to_traditional(markdown_msg2)
|
219 |
+
translate_btn=gr.Button(visible=False)
|
220 |
elif radio == "English":
|
221 |
index = 2
|
222 |
+
text_input=gr.Textbox(label = text_input_label[index])
|
223 |
+
text_output=gr.Textbox(label = text_output_label[index])
|
224 |
+
translate_btn=gr.Button(visible=True)
|
|
|
|
|
225 |
|
226 |
else:
|
227 |
index = 0
|
228 |
+
text_input=gr.Textbox(label = text_input_label[index])
|
229 |
+
text_output=gr.Textbox(label = text_output_label[index])
|
230 |
+
markdown=chinese_converter.to_simplified(markdown)
|
231 |
+
markdown_msg1=chinese_converter.to_simplified(markdown_msg1)
|
232 |
+
markdown_msg2=chinese_converter.to_simplified(markdown_msg2)
|
233 |
+
translate_btn=gr.Button(visible=False)
|
234 |
|
235 |
+
clear_btn = clear_label[index]
|
236 |
+
submit_btn = submit_label[index]
|
237 |
|
238 |
+
return [text_input,text_output,clear_btn,submit_btn,markdown,
|
239 |
+
markdown_msg1 ,markdown_msg2,translate_btn]
|
240 |
|
241 |
|
242 |
def clear_text():
|
|
|
244 |
text_output_update=""
|
245 |
|
246 |
return [text_input_update,text_output_update]
|
247 |
+
|
248 |
+
def translate_text(text_input,text_output):
|
249 |
+
|
250 |
+
text_input = translate(f"{text_input}")
|
251 |
+
text_output = translate(f"{text_output}")
|
252 |
+
|
253 |
+
return text_input,text_output
|
254 |
|
255 |
|
256 |
# %%
|
|
|
260 |
# }
|
261 |
# """
|
262 |
|
263 |
+
|
264 |
+
|
265 |
with gr.Blocks() as demo:
|
266 |
index = 0
|
267 |
example_list = [
|
268 |
["小家伙穿黄袍,花丛中把房造。飞到西来飞到东,人人夸他爱劳动。(猜一动物)"],
|
269 |
["一物生来身穿三百多件衣,每天脱一件,年底剩张皮。(猜一物品)"],
|
270 |
["A thousand threads, a million strands. Reaching the water, vanishing all at once. (Hint: natural phenomenon)"],
|
271 |
+
["无底洞"],
|
272 |
]
|
273 |
radio = gr.Radio(
|
274 |
["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
|
|
|
278 |
# Chinese Lantern Riddles Solver with LLM
|
279 |
## 用大语言模型来猜灯谜
|
280 |
""",elem_id="markdown")
|
281 |
+
|
282 |
with gr.Row():
|
283 |
with gr.Column():
|
284 |
text_input = gr.Textbox(label=text_input_label[index],
|
|
|
286 |
with gr.Row():
|
287 |
clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
|
288 |
submit_btn = gr.Button(value=submit_label[index], variant = "primary")
|
289 |
+
|
290 |
text_output = gr.Textbox(label=text_output_label[index])
|
291 |
+
|
292 |
+
translate_btn = gr.Button(value="Translate", variant = "primary", scale=0, visible=False)
|
293 |
|
294 |
|
295 |
examples = gr.Examples(
|
|
|
354 |
|
355 |
submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
|
356 |
|
357 |
+
translate_btn.click(fn=translate_text, inputs=[text_input,text_output], outputs=[text_input,text_output])
|
358 |
+
|
359 |
clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
|
360 |
radio.change(fn=change_language,inputs=[radio,text_input,text_output,
|
361 |
+
markdown, markdown_msg1,markdown_msg2,translate_btn],
|
362 |
outputs=[text_input,text_output,clear_btn,submit_btn,
|
363 |
+
markdown, markdown_msg1,markdown_msg2,translate_btn])
|
364 |
|
365 |
#demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
|
366 |
# flagging_options=["Inappropriate"],allow_flagging="never",
|
gradio_cached_examples/10/log.csv
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
谜底,flag,username,timestamp
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
这个谜底是雨水,因为雨水是由水滴和小水泡组成的,当它们落在地面上时,由于受到重力的作用,就会被冲走。因此,这个谜底就是雨水。",,,2024-02-19 01:01:10.248405
|
7 |
-
这个谜面是通过“无底洞”来比喻一个无法预测或理解的、非常深邃的地方,因此可以推断出成语“深不可测”。,,,2024-02-19 01:01:13.125556
|
|
|
1 |
谜底,flag,username,timestamp
|
2 |
+
谜底是:蜜蜂,,,2024-02-22 22:14:00.108103
|
3 |
+
谜底是:日历,,,2024-02-22 22:14:01.397728
|
4 |
+
谜底是:雨,,,2024-02-22 22:14:02.151370
|
5 |
+
谜底是:深不可测,,,2024-02-22 22:14:02.200411
|
|
|
|