Spaces:

HIT-TMG
/

dialogue-bart-large-chinese

Runtime error

App Files Files Community

YanshekWoo commited on Dec 14, 2022

Commit

d18f655

•

1 Parent(s): e590807

ADD dialogue chat

Browse files

Files changed (1) hide show

app.py +62 -21

app.py CHANGED Viewed

@@ -1,44 +1,85 @@
 import gradio as gr
 from transformers import BertTokenizer, BartForConditionalGeneration
 title = "HIT-TMG/dialogue-bart-large-chinese"
 description = """
 This is a seq2seq model fine-tuned on several Chinese dialogue datasets, from bart-large-chinese.
 See some details of model card at https://huggingface.co/HIT-TMG/dialogue-bart-large-chinese .
-Input example: 可以 认识 一下 吗 ？[SEP]当然 可以 啦 ， 你好 。[SEP]嘿嘿 你好 ， 请问 你 最近 在 忙 什么 呢 ？[SEP]我 最近 养 了 一只 狗狗 ， 我 在 训练 它 呢 。
 """
 tokenizer = BertTokenizer.from_pretrained("HIT-TMG/dialogue-bart-large-chinese")
 model = BartForConditionalGeneration.from_pretrained("HIT-TMG/dialogue-bart-large-chinese")
-def chat(history):
-    history_prefix = "对话历史："
-    history = history_prefix + history
-    outputs = tokenizer(history,
-                        return_tensors='pt',
-                        padding=True,
-                        truncation=True,
-                        max_length=512)
-    input_ids = outputs.input_ids
     output_ids = model.generate(input_ids)[0]
-    return tokenizer.decode(output_ids, skip_special_tokens=True)
-chatbot = gr.Chatbot().style(color_map=("green", "pink"))
-demo = gr.Interface(
-    chat,
-    inputs=gr.Textbox(lines=8, placeholder="输入你的对话历史（请以'[SEP]'作为每段对话的间隔）\nInput the dialogue history (Please split utterances by '[SEP]')"),
-    title=title,
-    description=description,
-    outputs =["text"]
-)
 if __name__ == "__main__":

 import gradio as gr
+import torch
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
 from transformers import BertTokenizer, BartForConditionalGeneration
 title = "HIT-TMG/dialogue-bart-large-chinese"
 description = """
 This is a seq2seq model fine-tuned on several Chinese dialogue datasets, from bart-large-chinese.
 See some details of model card at https://huggingface.co/HIT-TMG/dialogue-bart-large-chinese .
 """
+# Input example: 可以 认识 一下 吗 ？[SEP]当然 可以 啦 ， 你好 。[SEP]嘿嘿 你好 ， 请问 你 最近 在 忙 什么 呢 ？[SEP]我 最近 养 了 一只 狗狗 ， 我 在 训练 它 呢 。
 tokenizer = BertTokenizer.from_pretrained("HIT-TMG/dialogue-bart-large-chinese")
 model = BartForConditionalGeneration.from_pretrained("HIT-TMG/dialogue-bart-large-chinese")
+tokenizer.truncation_side = 'left'
+max_length = 512
+# def chat(history):
+#     history_prefix = "对话历史："
+#     history = history_prefix + history
+#
+#     outputs = tokenizer(history,
+#                         return_tensors='pt',
+#                         padding=True,
+#                         truncation=True,
+#                         max_length=512)
+#
+#     input_ids = outputs.input_ids
+#     output_ids = model.generate(input_ids)[0]
+#
+#     return tokenizer.decode(output_ids, skip_special_tokens=True)
+#
+#
+# chatbot = gr.Chatbot().style(color_map=("green", "pink"))
+# demo = gr.Interface(
+#     chat,
+#     inputs=gr.Textbox(lines=8, placeholder="输入你的对话历史（请以'[SEP]'作为每段对话的间隔）\nInput the dialogue history (Please split utterances by '[SEP]')"),
+#     title=title,
+#     description=description,
+#     outputs =["text"]
+# )
+#
+#
+# if __name__ == "__main__":
+#     demo.launch()
+def chat_func(input_utterance, history: Optional[List[str]] = None):
+    if history is not None:
+        history.append(input_utterance)
+    else:
+        history = [input_utterance]
+    history_str = "对话历史：" + tokenizer.sep_token.join(history)
+    input_ids = tokenizer(history_str,
+                          return_tensors='pt',
+                          truncation=True,
+                          max_length=max_length).input_ids
     output_ids = model.generate(input_ids)[0]
+    response = tokenizer.decode(output_ids, skip_special_tokens=True)
+    history.append(response)
+    # # convert the tokens to text, and then split the responses into lines
+    # response = tokenizer.decode(history[0]).split("<|endoftext|>")
+    # # print('decoded_response-->>'+str(response))
+    # response = [(response[i], response[i + 1]) for i in range(0, len(response) - 1, 2)]  # convert to tuples of list
+    # # print('response-->>'+str(response))
+    display_utterances = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)]
+    return display_utterances, history
+demo = gr.Interface(fn=chat_func,
+                    inputs=["text", "state"],
+                    outputs=["chatbot", "state"])
 if __name__ == "__main__":