Spaces:

openskyml
/

zephyr-7b-chat

Runtime error

App Files Files Community

ehristoforu commited on Oct 28, 2023

Commit

9740d69

1 Parent(s): b621929

Upload app (13).py

Browse files

Files changed (1) hide show

app (13).py +254 -0

app (13).py ADDED Viewed

	@@ -0,0 +1,254 @@

+import os
+from typing import Iterator
+import gradio as gr
+from model import run
+HF_PUBLIC = os.environ.get("HF_PUBLIC", False)
+DEFAULT_SYSTEM_PROMPT = "You are CodeLlama. You are AI-assistant, you are polite, give only truthful information and are based on the CodeLLaMA-34B model from Meta. You can communicate in different languages equally well."
+MAX_MAX_NEW_TOKENS = 4096
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = 4000
+DESCRIPTION = """
+# CodeLlama-34B Chat
+💻 This Space demonstrates model [CodeLlama-34b-Instruct](https://huggingface.co/codellama/CodeLlama-34b-Instruct-hf) by Meta, a Code Llama model with 34B parameters fine-tuned for chat instructions and specialized on code tasks. Feel free to play with it, or duplicate to run generations without a queue! If you want to run your own service, you can also [deploy the model on Inference Endpoints](https://huggingface.co/inference-endpoints).
+🔎 For more details about the Code Llama family of models and how to use them with `transformers`, take a look [at our blog post](https://huggingface.co/blog/codellama) or [the paper](https://huggingface.co/papers/2308.12950).
+🏃🏻 Check out our [Playground](https://huggingface.co/spaces/codellama/codellama-playground) for a super-fast code completion demo that leverages a streaming [inference endpoint](https://huggingface.co/inference-endpoints).
+"""
+def clear_and_save_textbox(message: str) -> tuple[str, str]:
+    return '', message
+def display_input(message: str,
+                  history: list[tuple[str, str]]) -> list[tuple[str, str]]:
+    history.append((message, ''))
+    return history
+def delete_prev_fn(
+        history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
+    try:
+        message, _ = history.pop()
+    except IndexError:
+        message = ''
+    return history, message or ''
+def generate(
+    message: str,
+    history_with_input: list[tuple[str, str]],
+    system_prompt: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+) -> Iterator[list[tuple[str, str]]]:
+    if max_new_tokens > MAX_MAX_NEW_TOKENS:
+        raise ValueError
+    history = history_with_input[:-1]
+    generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
+    try:
+        first_response = next(generator)
+        yield history + [(message, first_response)]
+    except StopIteration:
+        yield history + [(message, '')]
+    for response in generator:
+        yield history + [(message, response)]
+def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
+    generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 50)
+    for x in generator:
+        pass
+    return '', x
+def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
+    input_token_length = len(message) + len(chat_history)
+    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
+        raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
+with gr.Blocks(css='style.css') as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(value='Duplicate Space for private use',
+                       elem_id='duplicate-button')
+    with gr.Group():
+        chatbot = gr.Chatbot(label='Playground')
+        with gr.Row():
+            textbox = gr.Textbox(
+                container=False,
+                show_label=False,
+                placeholder='Hi, CodeLlama!',
+                scale=10,
+            )
+            submit_button = gr.Button('Submit',
+                                      variant='primary',
+                                      scale=1,
+                                      min_width=0)
+    with gr.Row():
+        retry_button = gr.Button('🔄  Retry', variant='secondary')
+        undo_button = gr.Button('↩️ Undo', variant='secondary')
+        clear_button = gr.Button('🗑️  Clear', variant='secondary')
+    saved_input = gr.State()
+    with gr.Accordion(label='⚙️ Advanced options', open=False):
+        system_prompt = gr.Textbox(label='System prompt',
+                                   value=DEFAULT_SYSTEM_PROMPT,
+                                   lines=5,
+                                   interactive=False)
+        max_new_tokens = gr.Slider(
+            label='Max new tokens',
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        )
+        temperature = gr.Slider(
+            label='Temperature',
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=0.1,
+        )
+        top_p = gr.Slider(
+            label='Top-p (nucleus sampling)',
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.9,
+        )
+        top_k = gr.Slider(
+            label='Top-k',
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=10,
+        )
+    textbox.submit(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    button_event_preprocess = submit_button.click(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    retry_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+    undo_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=lambda x: x,
+        inputs=[saved_input],
+        outputs=textbox,
+        api_name=False,
+        queue=False,
+    )
+    clear_button.click(
+        fn=lambda: ([], ''),
+        outputs=[chatbot, saved_input],
+        queue=False,
+        api_name=False,
+    )
+demo.queue(max_size=32).launch(share=HF_PUBLIC, show_api=False)