Qwen2.5-7B-Instruct

Running on Zero

App Files Files Community

Rijgersberg commited on Sep 18, 2024

Commit

e769dfe

verified ·

1 Parent(s): b22aaae

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -129

app.py CHANGED Viewed

@@ -1,134 +1,42 @@
-import base64
-import os
-from gradio_client.utils import get_mimetype
-from openai import OpenAI
 import gradio as gr
-api_key = os.environ.get('OPENAI_API_KEY')
-client = OpenAI(api_key=api_key)
-MODELS = [
-    'gpt-4o',
-    'gpt-4o-mini',
-    'gpt-4',
-    'gpt-4-turbo',
-    'gpt-3.5-turbo',
-]
-def process_image(data):
-    with open(data['path'], "rb") as image_file:
-        b64image = base64.b64encode(image_file.read()).decode('utf-8')
-    return "data:" + data['mime_type'] + ";base64," + b64image
-def generate(message, history, model, system_prompt,
-             temperature=1.0, top_p=1.0, frequency_penalty=0.0, presence_penalty=0.0):
-    # history
-    history_openai_format=[{"role": "system", "content": system_prompt}]
-    for user, assistant in history:
-        if isinstance(user, tuple):  # there were files
-            content = []
-            for filepath in user:
-                mime_type = get_mimetype(filepath) or ''
-                if mime_type.startswith("image/"):
-                    content.append(
-                        {"type": "image_url",
-                         # for some reason you don't get the same image format in history as in message
-                         "image_url": {"url": process_image({'path': filepath,
-                                                             'mime_type': get_mimetype(filepath)})}}
-                    )
-            if content:
-                history_openai_format.append(
-                    {"role": "user", "content": content})
-        else:  # there was just text
-            history_openai_format.append({"role": "user", "content": user})
-        if assistant is not None:
-            history_openai_format.append({"role": "assistant", "content": assistant})
-    # new message
-    content = [{"type": "text",
-                "text": message['text']}]
-    for file in message['files']:
-        mime_type = get_mimetype(file['path']) or ''
-        if not mime_type.startswith('image/'):
-            raise gr.Error("Momenteel zijn alleen afbeeldingen ondersteund als bijlagen 💥!", duration=20)
-        content.append({"type": "image_url",
-                        "image_url": {"url": process_image(file)}})
-    history_openai_format.append(
-        {"role": "user", "content": content})
-    response = client.chat.completions.create(model=model,
-                                              messages=history_openai_format,
-                                              temperature=temperature,
-                                              top_p=top_p,
-                                              frequency_penalty=frequency_penalty,
-                                              presence_penalty=presence_penalty,
-                                              stream=True)
-    partial_message = ""
-    for chunk in response:
-        if chunk.choices and chunk.choices[0].delta.content is not None:
-            partial_message += chunk.choices[0].delta.content
-            yield partial_message
-chat_interface = gr.ChatInterface(
-    multimodal=True,
-    title='💬 Private ChatGPT',
-    description='Chat with OpenAI models using their official API. OpenAI <a href="https://platform.openai.com/docs/concepts">promises</a> not to train on input or output of API calls.',
-    fn=generate,
-    analytics_enabled=False,
-    chatbot=gr.Chatbot(
-        show_label=False,
-        show_copy_button=True,
-        scale=1),
-    additional_inputs=[
-        gr.Dropdown(label="Model",
-                    choices=MODELS,
-                    value=MODELS[0],
-                    allow_custom_value=False),
-        gr.Textbox(label="System prompt",
-                   value="Je bent een slimme, behulpzame assistent van Edwin Rijgersberg"),
-        gr.Slider(label="Temperature",
-                  minimum=0.,
-                  maximum=2.0,
-                  step=0.05,
-                  value=1.0),
-        gr.Slider(label="Top P",
-                  minimum=0.,
-                  maximum=1.0,
-                  step=0.05,
-                  value=1.0),
-        gr.Slider(label="Frequency penalty",
-                  minimum=0.,
-                  maximum=1.0,
-                  step=0.05,
-                  value=0.),
-        gr.Slider(label="Presence penalty",
-                  minimum=0.,
-                  maximum=1.0,
-                  step=0.05,
-                  value=0.),
-    ],
-    textbox=gr.MultimodalTextbox(
-                            file_types=['image'],
-                            show_label=False,
-                            label="Message",
-                            placeholder="Type een bericht...",
-                            scale=7,
-                        ),
-    additional_inputs_accordion=gr.Accordion(label="Instellingen", open=False),
-    show_progress="full",
-    submit_btn=None,
-    stop_btn="Stop",
-    retry_btn="🔄 Opnieuw",
-    undo_btn="↩️ Ongedaan maken",
-    clear_btn="🗑️ Wissen",
 )
 chat_interface.launch(share=True)

 import gradio as gr
+import spaces
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+model_name = "Qwen/Qwen2.5-7B-Instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
 )
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+@spaces.GPU
+def generate(prompt, history):
+    messages = [
+        {"role": "system", "content": "Je bent een vriendelijke, behulpzame assistent."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
+chat_interface = gr.ChatInterface()
 chat_interface.launch(share=True)