mistral-7b-chat

Runtime error

App Files Files Community

ehristoforu commited on Oct 28, 2023

Commit

807b486

•

1 Parent(s): eedaccf

Upload 4 files

Browse files

Files changed (4) hide show

app (13).py +254 -0
model (1).py +57 -0
requirements (14).txt +9 -0
style (2).css +16 -0

app (13).py ADDED Viewed

	@@ -0,0 +1,254 @@

+import os
+from typing import Iterator
+import gradio as gr
+from model import run
+HF_PUBLIC = os.environ.get("HF_PUBLIC", False)
+DEFAULT_SYSTEM_PROMPT = "You are CodeLlama. You are AI-assistant, you are polite, give only truthful information and are based on the CodeLLaMA-34B model from Meta. You can communicate in different languages equally well."
+MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = 4000
+DESCRIPTION = """
+# CodeLlama-34B Chat
+💻 This Space demonstrates model [CodeLlama-34b-Instruct](https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf) by Meta, a Code Llama model with 34B parameters fine-tuned for chat instructions and specialized on code tasks. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
+🔎 For more details about the Code Llama family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/codellama) or [the paper](https://huggingface.co/papers/2308.12950).
+🏃🏻 Check out our [Playground](https://huggingface.co/spaces/codellama/codellama-playground) for a super-fast code completion demo that leverages a streaming [inference endpoint](https://huggingface.co/inference-endpoints).
+"""
+def clear_and_save_textbox(message: str) -> tuple[str, str]:
+    return '', message
+def display_input(message: str,
+                  history: list[tuple[str, str]]) -> list[tuple[str, str]]:
+    history.append((message, ''))
+    return history
+def delete_prev_fn(
+        history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
+    try:
+        message, _ = history.pop()
+    except IndexError:
+        message = ''
+    return history, message or ''
+def generate(
+    message: str,
+    history_with_input: list[tuple[str, str]],
+    system_prompt: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+) -> Iterator[list[tuple[str, str]]]:
+    if max_new_tokens > MAX_MAX_NEW_TOKENS:
+        raise ValueError
+    history = history_with_input[:-1]
+    generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
+    try:
+        first_response = next(generator)
+        yield history + [(message, first_response)]
+    except StopIteration:
+        yield history + [(message, '')]
+    for response in generator:
+        yield history + [(message, response)]
+def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
+    generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 50)
+    for x in generator:
+        pass
+    return '', x
+def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
+    input_token_length = len(message) + len(chat_history)
+    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
+        raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
+with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(value='Duplicate Space for private use',
+                       elem_id='duplicate-button')
+    with gr.Group():
+        chatbot = gr.Chatbot(label='Playground')
+        with gr.Row():
+            textbox = gr.Textbox(
+                container=False,
+                show_label=False,
+                placeholder='Hi, CodeLlama!',
+                scale=10,
+            )
+            submit_button = gr.Button('Submit',
+                                      variant='primary',
+                                      scale=1,
+                                      min_width=0)
+    with gr.Row():
+        retry_button = gr.Button('🔄  Retry', variant='secondary')
+        undo_button = gr.Button('↩️ Undo', variant='secondary')
+        clear_button = gr.Button('🗑️  Clear', variant='secondary')
+    saved_input = gr.State()
+    with gr.Accordion(label='⚙️ Advanced options', open=False):
+        system_prompt = gr.Textbox(label='System prompt',
+                                   value=DEFAULT_SYSTEM_PROMPT,
+                                   lines=5,
+                                   interactive=False)
+        max_new_tokens = gr.Slider(
+            label='Max new tokens',
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        )
+        temperature = gr.Slider(
+            label='Temperature',
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.1,
+        )
+        top_p = gr.Slider(
+            label='Top-p (nucleus sampling)',
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        )
+        top_k = gr.Slider(
+            label='Top-k',
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=10,
+        )
+    textbox.submit(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    button_event_preprocess = submit_button.click(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    retry_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    undo_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=lambda x: x,
+        inputs=[saved_input],
+        outputs=textbox,
+        api_name=False,
+        queue=False,
+    )
+    clear_button.click(
+        fn=lambda: ([], ''),
+        outputs=[chatbot, saved_input],
+        queue=False,
+        api_name=False,
+    )
+demo.queue(max_size=32).launch(share=HF_PUBLIC, show_api=False)

model (1).py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+from typing import Iterator
+from text_generation import Client
+model_id = 'codellama/CodeLlama-34b-Instruct-hf'
+API_URL = "https://api-inference.huggingface.co/models/" + model_id
+HF_TOKEN = os.environ.get("HF_READ_TOKEN", None)
+client = Client(
+    API_URL,
+    headers={"Authorization": f"Bearer {HF_TOKEN}"},
+)
+EOS_STRING = "</s>"
+EOT_STRING = "<EOT>"
+def get_prompt(message: str, chat_history: list[tuple[str, str]],
+               system_prompt: str) -> str:
+    texts = [f'<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
+    # The first user input is _not_ stripped
+    do_strip = False
+    for user_input, response in chat_history:
+        user_input = user_input.strip() if do_strip else user_input
+        do_strip = True
+        texts.append(f'{user_input} [/INST] {response.strip()} </s><s>[INST] ')
+    message = message.strip() if do_strip else message
+    texts.append(f'{message} [/INST]')
+    return ''.join(texts)
+def run(message: str,
+        chat_history: list[tuple[str, str]],
+        system_prompt: str,
+        max_new_tokens: int = 1024,
+        temperature: float = 0.1,
+        top_p: float = 0.9,
+        top_k: int = 50) -> Iterator[str]:
+    prompt = get_prompt(message, chat_history, system_prompt)
+    generate_kwargs = dict(
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+    )
+    stream = client.generate_stream(prompt, **generate_kwargs)
+    output = ""
+    for response in stream:
+        if any([end_token in response.token.text for end_token in [EOS_STRING, EOT_STRING]]):
+            return output
+        else:
+            output += response.token.text
+        yield output
+    return output

requirements (14).txt ADDED Viewed

	@@ -0,0 +1,9 @@

+accelerate
+bitsandbytes
+gradio
+protobuf
+scipy
+sentencepiece
+torch
+text_generation
+git+https://github.com/huggingface/transformers@main

style (2).css ADDED Viewed

	@@ -0,0 +1,16 @@

+h1 {
+  text-align: center;
+}
+#duplicate-button {
+  margin: auto;
+  color: white;
+  background: #1565c0;
+  border-radius: 100vh;
+}
+#component-0 {
+  max-width: 900px;
+  margin: auto;
+  padding-top: 1.5rem;
+}