Spaces:

ethzanalytics
/

gpt2-xl-conversational

Runtime error

App Files Files Community

Peter commited on Oct 20, 2022

Commit

b4c0306

1 Parent(s): 8d9ed7d

✨ integrate constrained gen

Browse files

Signed-off-by: Peter <74869040+pszemraj@users.noreply.github.com>

Files changed (3) hide show

app.py +31 -26
constrained_generation.py +3 -5
converse.py +11 -5

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ app.py - the main file for the app. This creates the flask app and handles the r
 import argparse
 import logging
 import os
 import sys
 import time
@@ -16,7 +19,7 @@ import gradio as gr
 import nltk
 import torch
 from cleantext import clean
-from gradio.inputs import Slider, Textbox
 from transformers import pipeline
 from converse import discussion
@@ -40,13 +43,12 @@ warnings.filterwarnings(action="ignore", message=".*gradient_checkpointing*")
 import transformers
 transformers.logging.set_verbosity_error()
-logging.basicConfig()
 cwd = Path.cwd()
 my_cwd = str(cwd.resolve())  # string so it can be passed to os.path() objects
 def chat(
-    prompt_message, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 20
 ) -> str:
     """
     chat - the main function for the chatbot. This is the function that is called when the user
@@ -55,6 +57,7 @@ def chat(
     :param float temperature: the temperature value for the model, defaults to 0.6
     :param float top_p: the top_p value for the model, defaults to 0.95
     :param int top_k: the top_k value for the model, defaults to 25
     :return str: the response from the model
     """
     history = []
@@ -64,6 +67,7 @@ def chat(
         top_p=top_p,
         top_k=top_k,
         temperature=temperature,
     )
     history = [prompt_message, response]
     html = ""
@@ -85,7 +89,8 @@ def ask_gpt(
     top_p=0.95,
     top_k=25,
     temperature=0.5,
-    constrained_generation=True,
 ) -> str:
     """
     ask_gpt - helper function that asks the GPT model a question and returns the response
@@ -99,19 +104,20 @@ def ask_gpt(
     :param float top_p: the top_p value for the model, defaults to 0.95
     :param int top_k: the top_k value for the model, defaults to 25
     :param float temperature: the temperature value for the model, defaults to 0.6
     :return str: the response from the model
     """
     st = time.perf_counter()
     prompt = clean(message)  # clean user input
     prompt = prompt.strip()  # get rid of any extra whitespace
     in_len = len(chat_pipe.tokenizer(prompt).input_ids)
-    if in_len > 512:
-        # truncate to last 512 tokens
         tokens = chat_pipe.tokenizer(prompt).input_ids
-        trunc_tokens = tokens[-512:]
         prompt = chat_pipe.tokenizer.decode(trunc_tokens)
         print(f"truncated prompt to {len(trunc_tokens)} tokens, input length: {in_len}")
     resp = discussion(
         prompt_text=prompt,
         pipeline=chat_pipe,
@@ -122,7 +128,7 @@ def ask_gpt(
         temperature=temperature,
         max_length=max_length,
         min_length=min_length,
-        constrained_generation  = constrained_generation,
     )
     gpt_et = time.perf_counter()
     gpt_rt = round(gpt_et - st, 2)
@@ -134,10 +140,9 @@ def ask_gpt(
         cln_resp = synthesize_grammar(corrector=grammarbot, message=rawtxt)
     bot_resp_a = corr(remove_repeated_words(cln_resp))
     bot_resp = fix_punct_spacing(bot_resp_a)
-    print(f"the prompt was:\n\t{message}\nand the response was:\n\t{bot_resp}\n")
     corr_rt = round(time.perf_counter() - gpt_et, 4)
     print(
-        f"took {gpt_rt + corr_rt} sec to respond, {gpt_rt} for GPT, {corr_rt} for correction\n"
     )
     return remove_trailing_punctuation(bot_resp)
@@ -225,7 +230,7 @@ if __name__ == "__main__":
             Textbox(
                 default="Why is everyone here eating chocolate cake?",
                 label="prompt_message",
-                placeholder="Enter a question",
                 lines=2,
             ),
             Slider(
@@ -233,20 +238,21 @@ if __name__ == "__main__":
             ),
             Slider(minimum=0.0, maximum=1.0, step=0.01, default=0.95, label="top_p"),
             Slider(minimum=0, maximum=100, step=5, default=20, label="top_k"),
         ],
         outputs="html",
         examples_per_page=8,
         examples=[
-            ["Point Break or Bad Boys II?", 0.75, 0.95, 50],
-            ["So... you're saying this wasn't an accident?", 0.6, 0.95, 40],
-            ["Hi, my name is Reginald", 0.6, 0.95, 100],
-            ["Happy birthday!", 0.9, 0.95, 50],
-            ["I have a question, can you help me?", 0.6, 0.95, 50],
-            ["Do you know a joke?", 0.8, 0.85, 50],
-            ["Will you marry me?", 0.9, 0.95, 100],
-            ["Are you single?", 0.95, 0.95, 100],
-            ["Do you like people?", 0.7, 0.95, 25],
-            ["You never took a shortcut before?", 0.7, 0.95, 100],
         ],
         title=f"GPT Chatbot Demo: {default_model} Model",
         description=f"A Demo of a Chatbot trained for conversation with humans. Size XL= 1.5B parameters.\n\n"
@@ -254,20 +260,19 @@ if __name__ == "__main__":
         "You can find a link to the model card **[here](https://huggingface.co/ethzanalytics/ai-msgbot-gpt2-XL-dialogue)**\n\n"
         "1. responses can take up to 60 seconds to respond sometimes, patience is a virtue.\n"
         "2. the model was trained on several different datasets.  fact-check responses instead of regarding as a true statement.\n"
-        "3. Try adjusting the **[generation parameters](https://huggingface.co/blog/how-to-generate)** to get a better understanding of how they work!\n",
         css="""
             .chatbox {display:flex;flex-direction:row}
             .user_msg, .resp_msg {padding:4px;margin-bottom:4px;border-radius:4px;width:80%}
             .user_msg {background-color:cornflowerblue;color:white;align-self:start}
             .resp_msg {background-color:lightgray;align-self:self-end}
         """,
-        allow_screenshot=True,
         allow_flagging="never",
         theme="dark",
     )
     # launch the gradio interface and start the server
     iface.launch(
-        # prevent_thread_lock=True,
-        enable_queue=True,  # also allows for dealing with multiple users simultaneously (per newer gradio version)
     )

 import argparse
 import logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
 import os
 import sys
 import time
 import nltk
 import torch
 from cleantext import clean
+from gradio.inputs import Slider, Textbox, Radio
 from transformers import pipeline
 from converse import discussion
 import transformers
 transformers.logging.set_verbosity_error()
 cwd = Path.cwd()
 my_cwd = str(cwd.resolve())  # string so it can be passed to os.path() objects
 def chat(
+    prompt_message, temperature: float = 0.5, top_p: float = 0.95, top_k: int = 20, constrained_generation: str = "False"
 ) -> str:
     """
     chat - the main function for the chatbot. This is the function that is called when the user
     :param float temperature: the temperature value for the model, defaults to 0.6
     :param float top_p: the top_p value for the model, defaults to 0.95
     :param int top_k: the top_k value for the model, defaults to 25
+    :param bool constrained_generation: whether to use constrained generation or not, defaults to False
     :return str: the response from the model
     """
     history = []
         top_p=top_p,
         top_k=top_k,
         temperature=temperature,
+        constrained_generation="true" in constrained_generation.lower(),
     )
     history = [prompt_message, response]
     html = ""
     top_p=0.95,
     top_k=25,
     temperature=0.5,
+    constrained_generation=False,
+    max_input_length=128,
 ) -> str:
     """
     ask_gpt - helper function that asks the GPT model a question and returns the response
     :param float top_p: the top_p value for the model, defaults to 0.95
     :param int top_k: the top_k value for the model, defaults to 25
     :param float temperature: the temperature value for the model, defaults to 0.6
+    :param bool constrained_generation: whether to use constrained generation or not, defaults to False
     :return str: the response from the model
     """
     st = time.perf_counter()
     prompt = clean(message)  # clean user input
     prompt = prompt.strip()  # get rid of any extra whitespace
     in_len = len(chat_pipe.tokenizer(prompt).input_ids)
+    if in_len > max_input_length:
+        # truncate to last max_input_length tokens
         tokens = chat_pipe.tokenizer(prompt).input_ids
+        trunc_tokens = tokens[-max_input_length:]
         prompt = chat_pipe.tokenizer.decode(trunc_tokens)
         print(f"truncated prompt to {len(trunc_tokens)} tokens, input length: {in_len}")
+    logging.info(f"prompt: {prompt}")
     resp = discussion(
         prompt_text=prompt,
         pipeline=chat_pipe,
         temperature=temperature,
         max_length=max_length,
         min_length=min_length,
+        constrained_beam_search  = constrained_generation,
     )
     gpt_et = time.perf_counter()
     gpt_rt = round(gpt_et - st, 2)
         cln_resp = synthesize_grammar(corrector=grammarbot, message=rawtxt)
     bot_resp_a = corr(remove_repeated_words(cln_resp))
     bot_resp = fix_punct_spacing(bot_resp_a)
     corr_rt = round(time.perf_counter() - gpt_et, 4)
     print(
+        f"{gpt_rt + corr_rt} to respond, {gpt_rt} GPT, {corr_rt} for correction\n"
     )
     return remove_trailing_punctuation(bot_resp)
             Textbox(
                 default="Why is everyone here eating chocolate cake?",
                 label="prompt_message",
+                placeholder="Start a conversation with the bot",
                 lines=2,
             ),
             Slider(
             ),
             Slider(minimum=0.0, maximum=1.0, step=0.01, default=0.95, label="top_p"),
             Slider(minimum=0, maximum=100, step=5, default=20, label="top_k"),
+            Radio(choices=["True", "False"], default="False", label="constrained_generation"),
         ],
         outputs="html",
         examples_per_page=8,
         examples=[
+            ["Point Break or Bad Boys II?", 0.75, 0.95, 50, False],
+            ["So... you're saying this wasn't an accident?", 0.6, 0.95, 40, False],
+            ["Hi, my name is Reginald", 0.6, 0.95, 100, False],
+            ["Happy birthday!", 0.9, 0.95, 50, False],
+            ["I have a question, can you help me?", 0.6, 0.95, 50, False],
+            ["Do you know a joke?", 0.8, 0.85, 50, False],
+            ["Will you marry me?", 0.9, 0.95, 100, False],
+            ["Are you single?", 0.95, 0.95, 100, False],
+            ["Do you like people?", 0.7, 0.95, 25, False],
+            ["You never took a shortcut before?", 0.7, 0.95, 100, False],
         ],
         title=f"GPT Chatbot Demo: {default_model} Model",
         description=f"A Demo of a Chatbot trained for conversation with humans. Size XL= 1.5B parameters.\n\n"
         "You can find a link to the model card **[here](https://huggingface.co/ethzanalytics/ai-msgbot-gpt2-XL-dialogue)**\n\n"
         "1. responses can take up to 60 seconds to respond sometimes, patience is a virtue.\n"
         "2. the model was trained on several different datasets.  fact-check responses instead of regarding as a true statement.\n"
+        "3. Try adjusting the **[generation parameters](https://huggingface.co/blog/how-to-generate)** to get a better understanding of how they work!\n"
+        "4. New - try using [constrained beam search](https://huggingface.co/blog/constrained-beam-search) decoding to generate more coherent responses. _(experimental, feedback welcome!)_\n",
         css="""
             .chatbox {display:flex;flex-direction:row}
             .user_msg, .resp_msg {padding:4px;margin-bottom:4px;border-radius:4px;width:80%}
             .user_msg {background-color:cornflowerblue;color:white;align-self:start}
             .resp_msg {background-color:lightgray;align-self:self-end}
         """,
         allow_flagging="never",
         theme="dark",
     )
     # launch the gradio interface and start the server
     iface.launch(
+        enable_queue=True,
     )

constrained_generation.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import copy
 import logging
 import time
 from pathlib import Path
@@ -81,7 +82,7 @@ def create_kw_extractor(
     )
-def simple_kw(body_text: str, yake_ex=None, max_kw=10, verbose=False):
     """
     simple_kw - extract keywords from a text using yake
@@ -96,7 +97,7 @@ def simple_kw(body_text: str, yake_ex=None, max_kw=10, verbose=False):
     """
     yake_ex = yake_ex or create_kw_extractor(
         max_ngram_size=2,
-        ddpt=0.8,
         windowSize=10,
         deduplication_algo="seqm",
         numOfKeywords=max_kw,
@@ -219,7 +220,6 @@ def constrained_generation(
         if force_flexible is not None
         else None
     )
     try:
         logging.info("generating text..")
         result = pipeline(
@@ -236,8 +236,6 @@ def constrained_generation(
             length_penalty=length_penalty,
             repetition_penalty=repetition_penalty,
             return_full_text=full_text,
-            remove_invalid_values=True,
-            skip_special_tokens=True,
             clean_up_tokenization_spaces=True,
             early_stopping=True,
             do_sample=False,

 import copy
 import logging
+logging.basicConfig(level=logging.INFO)
 import time
 from pathlib import Path
     )
+def simple_kw(body_text: str, yake_ex=None, max_kw=15, verbose=False):
     """
     simple_kw - extract keywords from a text using yake
     """
     yake_ex = yake_ex or create_kw_extractor(
         max_ngram_size=2,
+        ddpt=0.9,
         windowSize=10,
         deduplication_algo="seqm",
         numOfKeywords=max_kw,
         if force_flexible is not None
         else None
     )
     try:
         logging.info("generating text..")
         result = pipeline(
             length_penalty=length_penalty,
             repetition_penalty=repetition_penalty,
             return_full_text=full_text,
             clean_up_tokenization_spaces=True,
             early_stopping=True,
             do_sample=False,

converse.py CHANGED Viewed

@@ -4,7 +4,8 @@
     https://huggingface.co/docs/transformers/v4.15.0/en/main_classes/model#transformers.generation_utils.GenerationMixin.generate.no_repeat_ngram_size
 """
 import pprint as pp
 import time
@@ -29,7 +30,7 @@ def discussion(
     num_return_sequences=1,
     device=-1,
     verbose=False,
-    constrained_generation=False,
 ):
     """
     discussion - a function that takes in a prompt and generates a response. This function is meant to be used in a conversation loop, and is the main function for the bot.
@@ -66,7 +67,8 @@ def discussion(
         pp.pprint(this_prompt, indent=4)
     # call the model
     print("\n... generating...")
-    if constrained_generation:
         response = constrained_generation(
             prompt=this_prompt,
             pipeline=pipeline,
@@ -75,7 +77,7 @@ def discussion(
             repetition_penalty=1.0,
             num_beams=4,
             timeout=timeout,
-            verbose=verbose,
             full_text=full_text,
             speaker_name=speaker,
             responder_name=responder,
@@ -83,12 +85,15 @@ def discussion(
         bot_dialogue = consolidate_texts(
             name_resp=responder,
-            model_resp=response,
             name_spk=speaker,
             verbose=verbose,
             print_debug=True,
         )
     else:
         bot_dialogue = gen_response(
             this_prompt,
             pipeline,
@@ -123,6 +128,7 @@ def discussion(
     p_list.append("\n")
     print("\nfinished!")
     # return the bot response and the full conversation
     return {"out_text": bot_resp, "full_conv": p_list}

     https://huggingface.co/docs/transformers/v4.15.0/en/main_classes/model#transformers.generation_utils.GenerationMixin.generate.no_repeat_ngram_size
 """
+import logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 import pprint as pp
 import time
     num_return_sequences=1,
     device=-1,
     verbose=False,
+    constrained_beam_search=False,
 ):
     """
     discussion - a function that takes in a prompt and generates a response. This function is meant to be used in a conversation loop, and is the main function for the bot.
         pp.pprint(this_prompt, indent=4)
     # call the model
     print("\n... generating...")
+    if constrained_beam_search:
+        logging.info("using constrained beam search")
         response = constrained_generation(
             prompt=this_prompt,
             pipeline=pipeline,
             repetition_penalty=1.0,
             num_beams=4,
             timeout=timeout,
+            verbose=False,
             full_text=full_text,
             speaker_name=speaker,
             responder_name=responder,
         bot_dialogue = consolidate_texts(
             name_resp=responder,
+            model_resp=response.split(
+        "\n"
+    ),
             name_spk=speaker,
             verbose=verbose,
             print_debug=True,
         )
     else:
+        logging.info("using sampling")
         bot_dialogue = gen_response(
             this_prompt,
             pipeline,
     p_list.append("\n")
     print("\nfinished!")
+    logging.info(f"finished generating response:\n\t{bot_resp}")
     # return the bot response and the full conversation
     return {"out_text": bot_resp, "full_conv": p_list}