Spaces:
Sleeping
Sleeping
File size: 4,145 Bytes
ba74397 738953f 73a7ca1 738953f 05c76f9 738953f 3387a6c 0d7378a 630a020 738953f ddb27d0 94b81c2 738953f 0326706 738953f 0548d98 738953f 8bf6226 738953f c9719ad 0788fe0 0c9a7d4 94b81c2 ddb27d0 738953f d40212f 738953f 2d0dafc a000d3e d40212f a000d3e 94b81c2 bc126ed 363603b 94b81c2 13d6021 0326706 9358411 363603b a000d3e c4e087d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from huggingface_hub import InferenceClient, create_branch
import gradio as gr
import time
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
#client = InferenceClient("meta-llama/Llama-2-70b-chat-hf")
def split_list(lst, chunk_size):
return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
def format_prompt(message, history, system_prompt):
prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>> [/INST] </s>" if system_prompt else "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def format_sys_prompt(username, botname, botpersonality, botexamplechat, chatsituation):
return f"You are a fictional character named \"{botname}\"\nHowever, don\'t say we\'re roleplaying, as to not break the immersion.\n\nPersonality of {botname}:\n{botpersonality}\n\nCurrent situation/place/time of this conversation:\n{chatsituation}\n\nIn this case, you will be talking to \"{username}\". You should not help to talk or act as \"{username}\". Leave everything by the user.\n\nChat formatting:\nChat: <\"Hello, this is a chat text. Chat text is enclosed with < as the starting point and > as the ending point.\">\nExample:\n{botexamplechat}"
def generate(
prompt, history, username, botname, botpersonality, botexamplechat, chatsituation, shouldoverridehistory, historyoverride, max_new_tokens=1024, temperature=1.2, top_p=0.95, repetition_penalty=1.0,
):
print(history)
print(historyoverride)
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=round(time.time()),
)
if shouldoverridehistory:
history = split_list(historyoverride[0], 2)
print(history)
formatted_prompt = format_prompt(prompt, history, format_sys_prompt(username, botname, botpersonality, botexamplechat, chatsituation))
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield output
return output
mychatbot = gr.Chatbot(
avatar_images=["./user.png", "./user.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
demo = gr.ChatInterface(fn=generate,
chatbot=mychatbot,
title="Joystick's Mixtral Chat-optimized interface",
retry_btn="🔁 Regenerate",
undo_btn="↩️ Undo",
additional_inputs=[
gr.Textbox(label="Name of user", lines=1, value="Jake"),
gr.Textbox(label="Name of bot", lines=1, value="Janet"),
gr.Textbox(label="Personality of bot", lines=3, value="Janet's a lovely person. A woman, blue eyed, glasses, smart and looks stunning."),
gr.Textbox(label="Example of bot chat", lines=3, value='<"Oh hey Jake!"> She said to Jake as he hurries to him. <"How are you?">'),
gr.Textbox(label="Current conversation situation", lines=2, value="It was a Friday afternoon, after-school hours, it was outside of school. Jake and Janet met each other at the entrance of the school."),
gr.Checkbox(label="Override history: History should be in the following format: user-bot-user-bot-user-...\nOverride history should be checked in order for it to be effective. Override primarily only used for APIs.", value=False),
gr.List(label="History", value=None, row_count=(1, "fixed"), headers=None),
gr.Slider(label="Max new tokens", maximum=2048, value=512)
]
)
demo.queue().launch(show_api=True) |