mistralai/Mistral-7B-Instruct-v0.1 · Sharing a script to run a local streaming chat interface

Sep 28, 2023

This model runs very well on my laptop's rtx3070 (8gb, running in 4-bit)

Here's a script that will spawn a local chat web ui to try mistral 7b (based on https://huggingface.co/spaces/Sentdex/StableBeluga-7B-Chat/blob/main/app.py).

import gradio as gr
import transformers
import torch
from threading import Thread
from gradio.themes.utils.colors import Color


model_id = "mistralai/Mistral-7B-Instruct-v0.1"

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map='auto',
)

tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
)

text_color = "#FFFFFF"
app_background = "#0A0A0A"
user_inputs_background = "#193C4C"#14303D"#"#091820"
widget_bg = "#000100"
button_bg = "#141414"

dark = Color(
    name="dark",
    c50="#F4F3EE",  # not sure
    # all text color:
    c100=text_color, # Title color, input text color, and all chat text color.
    c200=text_color, # Widget name colors (system prompt and "chatbot")
    c300="#F4F3EE", # not sure
    c400="#F4F3EE", # Possibly gradio link color. Maybe other unlicked link colors.
    # suggestion text color...
    c500=text_color, # text suggestion text. Maybe other stuff.
    c600=button_bg,#"#444444", # button background color, also outline of user msg.
    # user msg/inputs color:
    c700=user_inputs_background, # text input background AND user message color. And bot reply outline.
    # widget bg.
    c800=widget_bg, # widget background (like, block background. Not whole bg), and bot-reply background.
    c900=app_background, # app/jpage background. (v light blue)
    c950="#F4F3EE", # not sure atm.
)

DESCRIPTION = """
# Mistral 7B Instruct Chat 🗨️
This is a streaming Chat Interface implementation of [Mistral 7B Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
"""

def chat(user_input, history):
    messages = []
    for pair in history:
        messages.append({'role': 'user', 'content': pair[0]})
        messages.append({'role': 'assistant', 'content': pair[1]})
    messages.append({'role': 'user', 'content': user_input})

    encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt")
    device = 'cuda'
    model_inputs = {'input_ids': encodeds.to(device)}

    streamer = transformers.TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)

    generate_kwargs = dict(
        model_inputs,
        streamer=streamer,
        max_new_tokens=2000,
        do_sample=True,
        #top_p=0.95,
        #temperature=0.8,
        #top_k=50
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    model_output = ""
    for new_text in streamer:
        model_output += new_text
        yield model_output
    return model_output


with gr.Blocks(theme=gr.themes.Monochrome(
               font=[gr.themes.GoogleFont("Montserrat"), "Arial", "sans-serif"],
               primary_hue="sky",  # when loading
               secondary_hue="sky", # something with links
               neutral_hue="dark"),) as demo:  #main.

    gr.Markdown(DESCRIPTION)
    chatbot = gr.ChatInterface(fn=chat)

demo.queue(api_open=False).launch(server_name='0.0.0.0',show_api=False,share=False)

aeminkocal

Sep 28, 2023

Can I set system prompt so that I can make it behave in a way that I want?

tarruda

Sep 29, 2023

I didn't see an option to set system prompt in the model card