chinese_lantern_riddles_v2_sft

Sleeping

App Files Files Community

3v324v23 commited on Feb 22

Commit

f23a967

•

1 Parent(s): cd15210

updated UI, added translation to English

Browse files

Files changed (2) hide show

app.py +97 -37
gradio_cached_examples/10/log.csv +4 -6

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
 import gradio as gr
 import hanzidentifier
@@ -21,6 +22,11 @@ model = AutoModelForCausalLM.from_pretrained(
     llm_model_name
 )
 tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
 # %%
@@ -50,6 +56,8 @@ text_output_label=["谜底","謎底","Answer"]
 clear_label = ["清除","清除","Clear"]
 submit_label = ["提交","提交","Submit"]
 # %%
 # helper functions for prompt processing for this LLM
@@ -66,7 +74,7 @@ def answer(input_text,context=None):
     if context:
         tips = "提示：\n"
         tips += "\n".join([x[0] for x in context])
-        print (f"{input_text}\n{context[0][0]} {context[0][1]}")
         if context[0][1] >=0.9:
             ans = re.search(r"谜底：(\w+)", context[0][0])
             if ans:
@@ -77,7 +85,7 @@ def answer(input_text,context=None):
     prompt = f"{input_text}\n{tips}\n谜底是什么？"
     prompt = prompt.strip()
-    print(prompt)
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
@@ -112,7 +120,7 @@ def helper_rag(text):
     #docs_out = vectordb.max_marginal_relevance_search(text,k=5,fetch_k = 20, lambda_mult = 0.5)
     context = []
     for doc in docs_out:
-        if doc[1] > 0.5:
             context.append((f"{doc[0].page_content}{doc[0].metadata['answer']}", doc[1]))
     return context
@@ -127,11 +135,11 @@ def helper_text(text_input,radio=None):
     text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
-    if not any(c in text_input for c in ["猜", "打"]):
-        warning = "请给一个提示，提示格式，例子：猜一水果，打一字。"
-        if chinese_type == "traditional" or radio == "繁體中文":
-            warning = chinese_converter.to_traditional(warning)
-        return warning
     text=f"""猜谜语：\n谜面：{text_input}
     """
@@ -151,46 +159,84 @@ def helper_text(text_input,radio=None):
     #return output[0]
 # Gradio function for configure the language of UI
 def change_language(radio,text_input,text_output,markdown,
-                    markdown_msg1, markdown_msg2):
     if radio == "简体中文":
         index = 0
-        text_input_update=gr.Textbox(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
-        text_output_update=gr.Textbox(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
-        markdown_update=chinese_converter.to_simplified(markdown)
-        markdown_msg1_update=chinese_converter.to_simplified(markdown_msg1)
-        markdown_msg2_update=chinese_converter.to_simplified(markdown_msg2)
     elif radio == "繁體中文":
         index = 1
-        text_input_update=gr.Textbox(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
-        text_output_update=gr.Textbox(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
-        markdown_update=chinese_converter.to_traditional(markdown)
-        markdown_msg1_update=chinese_converter.to_traditional(markdown_msg1)
-        markdown_msg2_update=chinese_converter.to_traditional(markdown_msg2)
     elif radio == "English":
         index = 2
-        text_input_update=gr.Textbox(label = text_input_label[index])
-        text_output_update=gr.Textbox(label = text_output_label[index])
-        markdown_update=markdown
-        markdown_msg1_update=markdown_msg1
-        markdown_msg2_update=markdown_msg2
     else:
         index = 0
-        text_input_update=gr.Textbox(label = text_input_label[index])
-        text_output_update=gr.Textbox(label = text_output_label[index])
-        markdown_update=chinese_converter.to_simplified(markdown)
-        markdown_msg1_update=chinese_converter.to_simplified(markdown_msg1)
-        markdown_msg2_update=chinese_converter.to_simplified(markdown_msg2)
-    clear_btn_update = clear_label[index]
-    submit_btn_update = submit_label[index]
-    return [text_input_update,text_output_update,clear_btn_update,submit_btn_update,markdown_update,
-            markdown_msg1_update ,markdown_msg2_update]
 def clear_text():
@@ -198,6 +244,13 @@ def clear_text():
     text_output_update=""
     return [text_input_update,text_output_update]
 # %%
@@ -207,13 +260,15 @@ def clear_text():
 #           }
 # """
 with gr.Blocks() as demo:
     index = 0
     example_list = [
         ["小家伙穿黄袍，花丛中把房造。飞到西来飞到东，人人夸他爱劳动。（猜一动物）"],
         ["一物生来身穿三百多件衣，每天脱一件，年底剩张皮。（猜一物品）"],
         ["A thousand threads, a million strands. Reaching the water, vanishing all at once. (Hint: natural phenomenon)"],
-        ["无底洞（猜成语）"],
     ]
     radio = gr.Radio(
         ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
@@ -223,6 +278,7 @@ with gr.Blocks() as demo:
             # Chinese Lantern Riddles Solver with LLM
             ## 用大语言模型来猜灯谜
             """,elem_id="markdown")
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(label=text_input_label[index],
@@ -230,8 +286,10 @@ with gr.Blocks() as demo:
             with gr.Row():
                 clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
                 submit_btn = gr.Button(value=submit_label[index], variant = "primary")
             text_output = gr.Textbox(label=text_output_label[index])
             examples = gr.Examples(
@@ -296,11 +354,13 @@ with gr.Blocks() as demo:
     submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
     clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
     radio.change(fn=change_language,inputs=[radio,text_input,text_output,
-                                            markdown, markdown_msg1,markdown_msg2],
                  outputs=[text_input,text_output,clear_btn,submit_btn,
-                          markdown, markdown_msg1,markdown_msg2])
     #demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
     #                      flagging_options=["Inappropriate"],allow_flagging="never",

 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from langchain.vectorstores import Chroma
 from langchain.embeddings import HuggingFaceEmbeddings
+from peft import AutoPeftModelForCausalLM
 import gradio as gr
 import hanzidentifier
     llm_model_name
 )
+#model = AutoPeftModelForCausalLM.from_pretrained(
+#    "Qwen1.5_0.5B_Chat_sft_full/checkpoint-300",
+#    low_cpu_mem_usage=True,
+#)
 tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
 # %%
 clear_label = ["清除","清除","Clear"]
 submit_label = ["提交","提交","Submit"]
+threshold = 0.7
 # %%
 # helper functions for prompt processing for this LLM
     if context:
         tips = "提示：\n"
         tips += "\n".join([x[0] for x in context])
+        print (f"====\n{input_text}\n{context[0][0]} {context[0][1]}")
         if context[0][1] >=0.9:
             ans = re.search(r"谜底：(\w+)", context[0][0])
             if ans:
     prompt = f"{input_text}\n{tips}\n谜底是什么？"
     prompt = prompt.strip()
+    print(f"===\n{prompt}")
     messages = [
         {"role": "system", "content": "You are a helpful assistant."},
     #docs_out = vectordb.max_marginal_relevance_search(text,k=5,fetch_k = 20, lambda_mult = 0.5)
     context = []
     for doc in docs_out:
+        if doc[1] > threshold:
             context.append((f"{doc[0].page_content}{doc[0].metadata['answer']}", doc[1]))
     return context
     text_input = re.sub(r'hint',"猜",text_input,flags=re.I)
+    #if not any(c in text_input for c in ["猜", "打"]):
+    #    warning = "请给一个提示，提示格式，例子：猜一水果，打一字。"
+    #    if chinese_type == "traditional" or radio == "繁體中文":
+    #        warning = chinese_converter.to_traditional(warning)
+    #    return warning
     text=f"""猜谜语：\n谜面：{text_input}
     """
     #return output[0]
+# get answer from LLM with prompt input
+def translate(input_text):
+    '''Use LLM for translation'''
+    prompt = f"""翻译以下內容成英语：
+    {input_text}
+    """
+    print(prompt)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(device="cpu")
+    generated_ids = model.generate(
+        model_inputs.input_ids,
+        max_new_tokens=128,
+        do_sample=False,
+        top_p=0.0
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    #return out_text[0]["generated_text"]
+    return response
+    #return postprocess(out_text[0]["generated_text"])
 # Gradio function for configure the language of UI
 def change_language(radio,text_input,text_output,markdown,
+                    markdown_msg1, markdown_msg2,translate_btn):
     if radio == "简体中文":
         index = 0
+        text_input=gr.Textbox(value = chinese_converter.to_simplified(text_input), label = text_input_label[index])
+        text_output=gr.Textbox(value = chinese_converter.to_simplified(text_output),label = text_output_label[index])
+        markdown=chinese_converter.to_simplified(markdown)
+        markdown_msg1=chinese_converter.to_simplified(markdown_msg1)
+        markdown_msg2=chinese_converter.to_simplified(markdown_msg2)
+        translate_btn=gr.Button(visible=False)
     elif radio == "繁體中文":
         index = 1
+        text_input=gr.Textbox(value = chinese_converter.to_traditional(text_input),label = text_input_label[index])
+        text_output=gr.Textbox(value = chinese_converter.to_traditional(text_output),label = text_output_label[index])
+        markdown=chinese_converter.to_traditional(markdown)
+        markdown_msg1=chinese_converter.to_traditional(markdown_msg1)
+        markdown_msg2=chinese_converter.to_traditional(markdown_msg2)
+        translate_btn=gr.Button(visible=False)
     elif radio == "English":
         index = 2
+        text_input=gr.Textbox(label = text_input_label[index])
+        text_output=gr.Textbox(label = text_output_label[index])
+        translate_btn=gr.Button(visible=True)
     else:
         index = 0
+        text_input=gr.Textbox(label = text_input_label[index])
+        text_output=gr.Textbox(label = text_output_label[index])
+        markdown=chinese_converter.to_simplified(markdown)
+        markdown_msg1=chinese_converter.to_simplified(markdown_msg1)
+        markdown_msg2=chinese_converter.to_simplified(markdown_msg2)
+        translate_btn=gr.Button(visible=False)
+    clear_btn = clear_label[index]
+    submit_btn = submit_label[index]
+    return [text_input,text_output,clear_btn,submit_btn,markdown,
+            markdown_msg1 ,markdown_msg2,translate_btn]
 def clear_text():
     text_output_update=""
     return [text_input_update,text_output_update]
+def translate_text(text_input,text_output):
+    text_input = translate(f"{text_input}")
+    text_output = translate(f"{text_output}")
+    return text_input,text_output
 # %%
 #           }
 # """
 with gr.Blocks() as demo:
     index = 0
     example_list = [
         ["小家伙穿黄袍，花丛中把房造。飞到西来飞到东，人人夸他爱劳动。（猜一动物）"],
         ["一物生来身穿三百多件衣，每天脱一件，年底剩张皮。（猜一物品）"],
         ["A thousand threads, a million strands. Reaching the water, vanishing all at once. (Hint: natural phenomenon)"],
+        ["无底洞"],
     ]
     radio = gr.Radio(
         ["简体中文","繁體中文", "English"],show_label=False,value="简体中文"
             # Chinese Lantern Riddles Solver with LLM
             ## 用大语言模型来猜灯谜
             """,elem_id="markdown")
     with gr.Row():
         with gr.Column():
             text_input = gr.Textbox(label=text_input_label[index],
             with gr.Row():
                 clear_btn = gr.ClearButton(value=clear_label[index],components=[text_input])
                 submit_btn = gr.Button(value=submit_label[index], variant = "primary")
             text_output = gr.Textbox(label=text_output_label[index])
+            translate_btn = gr.Button(value="Translate", variant = "primary", scale=0, visible=False)
             examples = gr.Examples(
     submit_btn.click(fn=helper_text, inputs=[text_input,radio], outputs=text_output)
+    translate_btn.click(fn=translate_text, inputs=[text_input,text_output], outputs=[text_input,text_output])
     clear_btn.click(fn=clear_text,outputs=[text_input,text_output])
     radio.change(fn=change_language,inputs=[radio,text_input,text_output,
+                                            markdown, markdown_msg1,markdown_msg2,translate_btn],
                  outputs=[text_input,text_output,clear_btn,submit_btn,
+                          markdown, markdown_msg1,markdown_msg2,translate_btn])
     #demo = gr.Interface(fn=helper_text, inputs=text_input, outputs=text_output,
     #                      flagging_options=["Inappropriate"],allow_flagging="never",

gradio_cached_examples/10/log.csv CHANGED Viewed

@@ -1,7 +1,5 @@
 谜底,flag,username,timestamp
-这个谜面描述了一个小家伙穿着黄色的袍子，在花丛中建造房子。这个小家伙是蜜蜂，因为蜜蜂在花朵上采集花蜜，然后将这些花蜜转化为蜂蜜。所以答案是蜜蜂。,,,2024-02-19 01:00:57.388598
-谜底是日历。因为日历上的日期会随着时间的推移而改变，所以每过一年，就会有一层新的布料覆盖在上面，这就是所谓的“年轮”。因此，当最后一层布料被剥落时，就只剩下了一张没有了皮的日历。,,,2024-02-19 01:01:02.376888
-"这个谜面描述了一个自然现象，即“千条线，万条线，掉到水里看不见”。这个现象通常指的是水流在地面上形成的小水滴或小水泡，这些小水滴或小水泡会随着水流的流动而消失不见。
-这个谜底是雨水，因为雨水是由水滴和小水泡组成的，当它们落在地面上时，由于受到重力的作用，就会被冲走。因此，这个谜底就是雨水。",,,2024-02-19 01:01:10.248405
-这个谜面是通过“无底洞”来比喻一个无法预测或理解的、非常深邃的地方，因此可以推断出成语“深不可测”。,,,2024-02-19 01:01:13.125556

 谜底,flag,username,timestamp
+谜底是：蜜蜂,,,2024-02-22 22:14:00.108103
+谜底是：日历,,,2024-02-22 22:14:01.397728
+谜底是：雨,,,2024-02-22 22:14:02.151370
+谜底是：深不可测,,,2024-02-22 22:14:02.200411