Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

slush0 commited on Feb 21, 2023

Commit

b70859f

1 Parent(s): 2caf2e7

Reformatted with black

Browse files

Files changed (3) hide show

app.py +12 -7
chat.py +141 -86
prompt.py +127 -51

app.py CHANGED Viewed

@@ -1,18 +1,23 @@
 import gradio as gr
-from prompt import iface_prompt
 from chat import iface_chat
 with gr.Blocks() as iface:
-    gr.Markdown("""# Petals playground
-            **Let's play with prompts and inference settings for BLOOM and BLOOMZ 176B models!**
-            This space uses websocket API of [chat.petals.ml](http://chat.petals.ml). Health status of Petals network [lives here](http://health.petals.ml).
-            Do NOT talk to BLOOM as an entity, it's not a chatbot but a webpage/blog/article completion model.
-            For the best results: MIMIC a few sentences of a webpage similar to the content you want to generate.
-            BLOOMZ performs better in chat mode and understands the instructions better.""")
     gr.TabbedInterface([iface_prompt, iface_chat], ["Prompt mode", "Chat mode"])

+#!/usr/bin/env python
+# or gradio app.py
 import gradio as gr
 from chat import iface_chat
+from prompt import iface_prompt
 with gr.Blocks() as iface:
+    gr.Markdown(
+        """# Petals playground
+        **Let's play with prompts and inference settings for BLOOM and BLOOMZ 176B models!**
+        This space uses websocket API of [chat.petals.ml](http://chat.petals.ml). Health status of Petals network [lives here](http://health.petals.ml).
+        Do NOT talk to BLOOM as an entity, it's not a chatbot but a webpage/blog/article completion model.
+        For the best results: MIMIC a few sentences of a webpage similar to the content you want to generate.
+        BLOOMZ performs better in chat mode and understands the instructions better."""
+    )
     gr.TabbedInterface([iface_prompt, iface_chat], ["Prompt mode", "Chat mode"])

chat.py CHANGED Viewed

@@ -1,26 +1,26 @@
-#!/usr/bin/env python
-# or gradio app.py
 import traceback
 import gradio as gr
 import chat_client
-import time
-import json
-import re
-CHAT_URL='ws://chat.petals.ml/api/v2/generate'
-#CHAT_URL='ws://localhost:8000/api/v2/generate'
 EMPTY_STATE = {
-    'generate': False,
-    'model': None,
-    'client': None,
-    'history': [],
 }
 def generate(state, prompt, model, context, output, *args):
     # Save that we're in generating loop
-    state['generate'] = True
     try:
         yield from _generate(state, prompt, model, context, output, *args)
@@ -29,16 +29,28 @@ def generate(state, prompt, model, context, output, *args):
         # TODO This is a bit fragile because of recursive call...
         print("Retrying session...")
         context = output
-        output = ''
         yield from generate(state, prompt, model, context, output, *args)
     finally:
-        state['generate'] = False
-def _generate(state, prompt, model, context, output, endseq, max_length,
-        do_sample, top_k, top_p, temperature):
     start = time.time()
-    cnt = 0 # Tokens generated
     def stats():
         # Produces inline stats for generation speed
@@ -50,34 +62,37 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
         sec_per_item = (time.time() - start) / cnt
         return f" | {sec_per_item:.1f} sec/t"
-    print('prompt', prompt)
     eos = "</s>\n" if "bloomz" in model else "\n\n"
-    if state['model'] != model and output:
         # If the connection is resumed, output is truncated in generate().
-        # So this happen when user change model.
         context = output
-        output = ''
-    if state['model'] != model or \
-        state['client'] == None or state['client'].is_session() == False:
         try:
-            state['client'] = chat_client.ModelClient(CHAT_URL)
-            state['client'].open_session(f"bigscience/{model}-petals", max_length)
-            state['model'] = model
         except Exception:
             print(traceback.format_exc())
             raise gr.Error(traceback.format_exc(limit=3))
     else:
-        context = ''
-    client = state['client']
     context += eos
-    #for question, answer in state['history']:
-    #    context += f"Human: {question}{eos}AI: {answer}{eos}"
     # Fix eventual eos token mismatch and add eos token to context and prompt
     if "bloomz" in model:
@@ -87,7 +102,7 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
         context = context.replace("</s>", eos)
         context = re.sub(r"\n\n+", "\n\n", context)
         prompt2 = prompt.replace("</s>", eos) + "\n\n"
     prompt2 = f"{context}Human: {prompt2}AI:"
     # Translate checkbox items to actual sequences
@@ -119,24 +134,22 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
     output += prompt2
-    # Update widgets even before we get the first response
-    yield state, state['history'] + [[prompt, stats()]], None, output
-    orig_history = state['history']
-    new_line = ''
     try:
-        for out in client.generate(prompt2,
-                    max_new_tokens=1,
-                    do_sample=do_sample,
-                    temperature=temperature,
-                    top_k=top_k,
-                    top_p=top_p,
-                    extra_stop_sequences=seq
-            ):
-            if not state['generate']:
                 client.close_session()
-                yield state, [], None, ''
                 # Stopping generation
                 return
@@ -149,64 +162,84 @@ def _generate(state, prompt, model, context, output, endseq, max_length,
                 spl = new_line.split(s)
                 new_line = spl[0]
                 if len(spl) > 1:
-                    state['history'] = orig_history + [[prompt, new_line]]
                     output += new_line
-                    yield state, state['history'], None, output
                     # Stopping generation
                     return
             # Keep original history untouched as we're adding just
             # a chunks at one moment.
-            state['history'] = orig_history + [[prompt, new_line + stats()]]
-            yield state, state['history'], None, output
         # Final line w/o statistics
-        yield state, state['history'], None, output
     except (json.decoder.JSONDecodeError, BrokenPipeError):
         # Session was interrupted
         # Handled in upstream func
         client.close_session()
-        state['client'] = None
-        state['model'] = None
         print("Broken session!")
         raise
     except Exception:
         client.close_session()
-        state['client'] = None
-        state['model'] = None
         print(traceback.format_exc())
         raise gr.Error(traceback.format_exc(limit=3))
 def reset(state):
     """Resets the session and clears the chat window."""
     state.update(EMPTY_STATE)
-    return state, [], ''
 with gr.Blocks() as iface_chat:
     gr.Markdown("""**Let's talk to Bloom in a chat!**""")
     with gr.Row():
-        model = gr.Radio(["bloom", "bloomz", "bloom-7b1"], value='bloomz', label="Use model")
         # Additional ending sequence, at which generation shoud stop
-        endseq = gr.CheckboxGroup(["Human:", "AI:", "\\n", "</s>", "? (question mark)", ". (dot)"],
-                value=["Human:", "AI:", "\\n", "</s>"], label='Extra end sequences')
         # Maximum length of inference session
-        max_length = gr.Radio([64, 128, 256, 512, 1024, 2048], value=1024, interactive=True, label="Max length")
     with gr.Row():
         with gr.Column():
             # Switch between sampling and greedy generation
             do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
-            context = gr.Textbox(lines=3, label="Initial context:", interactive=True,
-                    value="A human talks to a powerful AI that follows the human's instructions.\n"
-                          "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
-                          "Human: Hi!</s>\n"
-                          "AI: How can I help you?")
         # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
         top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
@@ -214,12 +247,14 @@ with gr.Blocks() as iface_chat:
         # TODO num_beams
         # Generation temperature
-        temperature = gr.Number(value=0.75, precision=2, interactive=True, label="Temperature")
-    chat = gr.Chatbot(label='Chat window')
-    prompt = gr.Textbox(show_label=False, label='Prompt',
-            placeholder="Prompt Here and press Enter...").style(container=False)
     with gr.Row():
         button_generate = gr.Button("Generate")
@@ -231,20 +266,40 @@ with gr.Blocks() as iface_chat:
     # Chat history
     state = gr.State(EMPTY_STATE)
-    inputs = [state, prompt, model, context, output, endseq,
-        max_length, do_sample, top_k, top_p, temperature]
-    outputs=[state, chat, prompt, output]
     prompt.submit(generate, inputs=inputs, outputs=outputs)
     button_generate.click(generate, inputs=inputs, outputs=outputs)
     button_reset.click(reset, inputs=[state], outputs=[state, chat, output])
-    examples = gr.Examples(inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
         examples=[
-        ["A Human talks to a powerful AI that follows the Human's instructions. "
-         "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
-         "Human: Hi!</s>\n"
-         "AI: Hi! How can I help you?",
-         "Could you remind me please who was Neil Armstrong?",
-         "bloomz", True, 0, 0.9, 0.75],
-        ])

+import json
+import re
+import time
 import traceback
 import gradio as gr
 import chat_client
+CHAT_URL = "ws://chat.petals.ml/api/v2/generate"
+# CHAT_URL='ws://localhost:8000/api/v2/generate'
 EMPTY_STATE = {
+    "generate": False,
+    "model": None,
+    "client": None,
+    "history": [],
 }
 def generate(state, prompt, model, context, output, *args):
     # Save that we're in generating loop
+    state["generate"] = True
     try:
         yield from _generate(state, prompt, model, context, output, *args)
         # TODO This is a bit fragile because of recursive call...
         print("Retrying session...")
         context = output
+        output = ""
         yield from generate(state, prompt, model, context, output, *args)
     finally:
+        state["generate"] = False
+def _generate(
+    state,
+    prompt,
+    model,
+    context,
+    output,
+    endseq,
+    max_length,
+    do_sample,
+    top_k,
+    top_p,
+    temperature,
+):
     start = time.time()
+    cnt = 0  # Tokens generated
     def stats():
         # Produces inline stats for generation speed
         sec_per_item = (time.time() - start) / cnt
         return f" | {sec_per_item:.1f} sec/t"
     eos = "</s>\n" if "bloomz" in model else "\n\n"
+    if state["model"] != model and output:
         # If the connection is resumed, output is truncated in generate().
+        # So this executes when user change model.
         context = output
+        output = ""
+    # Update widgets even before we get the first response
+    print("prompt", prompt)
+    yield state, state["history"] + [[prompt, stats()]], "", output
+    if (
+        state["model"] != model
+        or state["client"] == None
+        or state["client"].is_session() == False
+    ):
         try:
+            state["client"] = chat_client.ModelClient(CHAT_URL)
+            state["client"].open_session(f"bigscience/{model}-petals", max_length)
+            state["model"] = model
         except Exception:
             print(traceback.format_exc())
             raise gr.Error(traceback.format_exc(limit=3))
     else:
+        context = ""
+    client = state["client"]
     context += eos
     # Fix eventual eos token mismatch and add eos token to context and prompt
     if "bloomz" in model:
         context = context.replace("</s>", eos)
         context = re.sub(r"\n\n+", "\n\n", context)
         prompt2 = prompt.replace("</s>", eos) + "\n\n"
     prompt2 = f"{context}Human: {prompt2}AI:"
     # Translate checkbox items to actual sequences
     output += prompt2
+    orig_history = state["history"]
+    new_line = ""
     try:
+        for out in client.generate(
+            prompt2,
+            max_new_tokens=1,
+            do_sample=do_sample,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            extra_stop_sequences=seq,
+        ):
+            if not state["generate"]:
                 client.close_session()
+                yield state, [], "", ""
                 # Stopping generation
                 return
                 spl = new_line.split(s)
                 new_line = spl[0]
                 if len(spl) > 1:
+                    state["history"] = orig_history + [[prompt, new_line]]
                     output += new_line
+                    yield state, state["history"], "", output
                     # Stopping generation
                     return
             # Keep original history untouched as we're adding just
             # a chunks at one moment.
+            state["history"] = orig_history + [[prompt, new_line + stats()]]
+            yield state, state["history"], "", output
         # Final line w/o statistics
+        yield state, state["history"], "", output
     except (json.decoder.JSONDecodeError, BrokenPipeError):
         # Session was interrupted
         # Handled in upstream func
         client.close_session()
+        state["client"] = None
+        state["model"] = None
         print("Broken session!")
         raise
     except Exception:
         client.close_session()
+        state["client"] = None
+        state["model"] = None
         print(traceback.format_exc())
         raise gr.Error(traceback.format_exc(limit=3))
 def reset(state):
     """Resets the session and clears the chat window."""
     state.update(EMPTY_STATE)
+    return state, [], ""
+# ---------------------------------------------------------
+# Defining Gradio layout
 with gr.Blocks() as iface_chat:
     gr.Markdown("""**Let's talk to Bloom in a chat!**""")
     with gr.Row():
+        model = gr.Radio(
+            ["bloom", "bloomz", "bloom-7b1"], value="bloomz", label="Use model"
+        )
         # Additional ending sequence, at which generation shoud stop
+        endseq = gr.CheckboxGroup(
+            ["Human:", "AI:", "\\n", "</s>", "? (question mark)", ". (dot)"],
+            value=["Human:", "AI:", "\\n", "</s>"],
+            label="Extra end sequences",
+        )
         # Maximum length of inference session
+        max_length = gr.Radio(
+            [64, 128, 256, 512, 1024, 2048],
+            value=1024,
+            interactive=True,
+            label="Max length",
+        )
     with gr.Row():
         with gr.Column():
             # Switch between sampling and greedy generation
             do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
+            context = gr.Textbox(
+                lines=3,
+                label="Initial context:",
+                interactive=True,
+                value="A Human talks to a powerful AI that follows "
+                "the Human's instructions.\n"
+                "AI is talkative, friendly, positive and provides "
+                "detailed answers to any question.</s>\n"
+                "Human: Hi!</s>\n"
+                "AI: How can I help you?",
+            )
         # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
         top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
         # TODO num_beams
         # Generation temperature
+        temperature = gr.Number(
+            value=0.75, precision=2, interactive=True, label="Temperature"
+        )
+    chat = gr.Chatbot(label="Chat window")
+    prompt = gr.Textbox(
+        show_label=False, label="Prompt", placeholder="Prompt Here and press Enter..."
+    ).style(container=False)
     with gr.Row():
         button_generate = gr.Button("Generate")
     # Chat history
     state = gr.State(EMPTY_STATE)
+    # Define button actions
+    inputs = [
+        state,
+        prompt,
+        model,
+        context,
+        output,
+        endseq,
+        max_length,
+        do_sample,
+        top_k,
+        top_p,
+        temperature,
+    ]
+    outputs = [state, chat, prompt, output]
     prompt.submit(generate, inputs=inputs, outputs=outputs)
     button_generate.click(generate, inputs=inputs, outputs=outputs)
     button_reset.click(reset, inputs=[state], outputs=[state, chat, output])
+    examples = gr.Examples(
+        inputs=[context, prompt, model, do_sample, top_k, top_p, temperature],
         examples=[
+            [
+                "A Human talks to a powerful AI that follows the Human's instructions. "
+                "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
+                "Human: Hi!</s>\n"
+                "AI: Hi! How can I help you?",
+                "Could you remind me please who was Neil Armstrong?",
+                "bloomz",
+                True,
+                0,
+                0.9,
+                0.75,
+            ],
+        ],
+    )

prompt.py CHANGED Viewed

@@ -1,33 +1,44 @@
-#!/usr/bin/env python
-# or gradio app.py
 import traceback
 import gradio as gr
 import chat_client
-import time
-CHAT_URL='ws://chat.petals.ml/api/v2/generate'
-#CHAT_URL='ws://localhost:8000/api/v2/generate'
 def generate(state, *args):
     # Save that we're in generating loop
-    state['generate'] = True
     try:
-        for x in _generate(state, *args):
-            yield x
     finally:
-        state['generate'] = False
-def _generate(state, prompt, model, endseq, max_length,
-        do_sample, top_k, top_p, temperature,
-        add_stoptoken, copy_output):
     start = time.time()
     cnt = 0
     def stats():
         # Produces inline stats for generation speed
         if cnt == 0:
             return "\u2026 | ? sec/t"
         if cnt > time.time() - start:
@@ -70,23 +81,24 @@ def _generate(state, prompt, model, endseq, max_length,
         temperature = 1.0
     prompt2 = prompt
-    output = ''
     # This render prompt dialog immediately and
     # don't wait to generator to return first result
     yield [state, prompt2, stats()]
     try:
-        for out in client.generate(prompt,
-                    max_new_tokens=1,
-                    do_sample=do_sample,
-                    temperature=temperature,
-                    top_k=top_k,
-                    top_p=top_p,
-                    extra_stop_sequences=seq
-            ):
-            if not state['generate']:
                 client.close_session()
                 return
@@ -104,31 +116,53 @@ def _generate(state, prompt, model, endseq, max_length,
         print(traceback.format_exc())
         raise gr.Error(traceback.format_exc(limit=3))
 def stop(state):
     """Stops generating."""
     state.update({"generate": False})
     return state
 with gr.Blocks() as iface_prompt:
-    gr.Markdown("""**Useful for testing raw prompts with zero, one or few-shot prompting.**""")
     with gr.Row():
-        model = gr.Radio(["bloom", "bloomz", "bloom-7b1"], value='bloom', label="Use model")
         # Additional ending sequence, at which generation shoud stop
-        endseq = gr.CheckboxGroup(["\\n", "</s>", "? (question mark)", ". (dot)"],
-            value=["\\n", "</s>"], label='Extra end sequences')
         # Maximum length of inference session
-        max_length = gr.Radio([64, 128, 256, 512, 1024, 2048], value=512, interactive=True, label="Max length")
     with gr.Row():
         with gr.Column():
             # Switch between sampling and greedy generation
             do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
-            # Should the app append stop sequence at the end of prompt or should it leave the prompt open?
-            add_stoptoken = gr.Checkbox(value=True, interactive=True, label="Automatically add eos token to the prompt.")
         # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
         top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
@@ -136,10 +170,12 @@ with gr.Blocks() as iface_prompt:
         # TODO num_beams
         # Generation temperature
-        temperature = gr.Number(value=0.75, precision=2, interactive=True, label="Temperature")
-    prompt = gr.Textbox(lines=3, label='Prompt', placeholder="Prompt Here...")
-    state = gr.State({'generate': False})
     with gr.Row():
         button_generate = gr.Button("Generate")
@@ -148,22 +184,62 @@ with gr.Blocks() as iface_prompt:
         # Automatically copy the output at the end of prompt
         copy_output = gr.Checkbox(label="Output -> Prompt")
-    output = gr.Textbox(lines=3, label='Output')
-    inputs = [state, prompt, model, endseq, max_length, do_sample,
-            top_k, top_p, temperature, add_stoptoken, copy_output]
-    outputs = [state, prompt, output]
-    button_generate.click(generate, inputs=inputs, outputs=outputs)
     button_stop.click(stop, inputs=[state], outputs=[state])
-    examples = gr.Examples(inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
         examples=[
-        ["The SQL command to extract all the users whose name starts with A is: ", "bloom-7b1", False, 0, 0, 1, False],
-        ["The Spanish translation of thank you for your help is: ", "bloom-7b1", False, 0, 0, 1, False],
-        ["A human talks to a powerful AI that follows the Human's instructions.\n"
-         "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
-         "Human: Hi!</s>\n"
-         "AI: Hi! How can I help you?</s>\n"
-         "Human: What's the capital of Portugal?</s>\n"
-         "AI: ", "bloomz", True, 0, 0.9, 0.75, False]
-        ])

+import time
 import traceback
 import gradio as gr
 import chat_client
+CHAT_URL = "ws://chat.petals.ml/api/v2/generate"
+# CHAT_URL='ws://localhost:8000/api/v2/generate'
 def generate(state, *args):
     # Save that we're in generating loop
+    state["generate"] = True
     try:
+        yield from _generate(state, *args)
     finally:
+        state["generate"] = False
+def _generate(
+    state,
+    prompt,
+    model,
+    endseq,
+    max_length,
+    do_sample,
+    top_k,
+    top_p,
+    temperature,
+    add_stoptoken,
+    copy_output,
+):
     start = time.time()
     cnt = 0
     def stats():
         # Produces inline stats for generation speed
+        # sec/t or t/sec depending on the speed
         if cnt == 0:
             return "\u2026 | ? sec/t"
         if cnt > time.time() - start:
         temperature = 1.0
     prompt2 = prompt
+    output = ""
     # This render prompt dialog immediately and
     # don't wait to generator to return first result
     yield [state, prompt2, stats()]
     try:
+        for out in client.generate(
+            prompt,
+            max_new_tokens=1,
+            do_sample=do_sample,
+            temperature=temperature,
+            top_k=top_k,
+            top_p=top_p,
+            extra_stop_sequences=seq,
+        ):
+            if not state["generate"]:
                 client.close_session()
                 return
         print(traceback.format_exc())
         raise gr.Error(traceback.format_exc(limit=3))
 def stop(state):
     """Stops generating."""
     state.update({"generate": False})
     return state
+# ---------------------------------------------------------
+# Defining Gradio layout
 with gr.Blocks() as iface_prompt:
+    gr.Markdown(
+        """**Useful for testing raw prompts with zero,
+        one or few-shot prompting.**"""
+    )
     with gr.Row():
+        model = gr.Radio(
+            ["bloom", "bloomz", "bloom-7b1"], value="bloom", label="Use model"
+        )
         # Additional ending sequence, at which generation shoud stop
+        endseq = gr.CheckboxGroup(
+            ["\\n", "</s>", "? (question mark)", ". (dot)"],
+            value=["\\n", "</s>"],
+            label="Extra end sequences",
+        )
         # Maximum length of inference session
+        max_length = gr.Radio(
+            [64, 128, 256, 512, 1024, 2048],
+            value=512,
+            interactive=True,
+            label="Max length",
+        )
     with gr.Row():
         with gr.Column():
             # Switch between sampling and greedy generation
             do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")
+            # Should the app append stop sequence at the end of prompt
+            # or should it leave the prompt open?
+            add_stoptoken = gr.Checkbox(
+                value=True,
+                interactive=True,
+                label="Automatically add eos token to the prompt.",
+            )
         # Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
         top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
         # TODO num_beams
         # Generation temperature
+        temperature = gr.Number(
+            value=0.75, precision=2, interactive=True, label="Temperature"
+        )
+    prompt = gr.Textbox(lines=3, label="Prompt", placeholder="Prompt Here...")
+    state = gr.State({"generate": False})
     with gr.Row():
         button_generate = gr.Button("Generate")
         # Automatically copy the output at the end of prompt
         copy_output = gr.Checkbox(label="Output -> Prompt")
+    output = gr.Textbox(lines=3, label="Output")
+    # Define button actions
+    button_generate.click(
+        generate,
+        inputs=[
+            state,
+            prompt,
+            model,
+            endseq,
+            max_length,
+            do_sample,
+            top_k,
+            top_p,
+            temperature,
+            add_stoptoken,
+            copy_output,
+        ],
+        outputs=[state, prompt, output],
+    )
     button_stop.click(stop, inputs=[state], outputs=[state])
+    examples = gr.Examples(
+        inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
         examples=[
+            [
+                "The SQL command to extract all the users whose name starts with A is: ",
+                "bloom-7b1",
+                False,
+                0,
+                0,
+                1,
+                False,
+            ],
+            [
+                "The Spanish translation of thank you for your help is: ",
+                "bloom-7b1",
+                False,
+                0,
+                0,
+                1,
+                False,
+            ],
+            [
+                "A human talks to a powerful AI that follows the Human's instructions.\n"
+                "AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
+                "Human: Hi!</s>\n"
+                "AI: Hi! How can I help you?</s>\n"
+                "Human: What's the capital of Portugal?</s>\n"
+                "AI: ",
+                "bloomz",
+                True,
+                0,
+                0.9,
+                0.75,
+                False,
+            ],
+        ],
+    )