Spaces:

AIDC-AI
/

Ovis1.6-Gemma2-9B

Running on Zero

App Files Files Community

root commited on Sep 21, 2024

Commit

e16319b

1 Parent(s): ff39987

add svg; add streaming

Browse files

Files changed (1) hide show

app.py +48 -32

app.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import spaces
 import os
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM
 model_name = 'AIDC-AI/Ovis1.6-Gemma2-9B'
@@ -14,14 +17,22 @@ model = AutoModelForCausalLM.from_pretrained(model_name,
                                              trust_remote_code=True).to(device='cuda')
 text_tokenizer = model.get_text_tokenizer()
 visual_tokenizer = model.get_visual_tokenizer()
 image_placeholder = '<image>'
 @spaces.GPU
-def ovis_chat(chatbot, image_input, text_input):
     # preprocess inputs
     conversations = []
-    for query, response in chatbot:
         conversations.append({
             "from": "human",
             "value": query
@@ -46,7 +57,6 @@ def ovis_chat(chatbot, image_input, text_input):
     else:
         pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
-    # generate output
     with torch.inference_mode():
         gen_kwargs = dict(
             max_new_tokens=512,
@@ -59,83 +69,89 @@ def ovis_chat(chatbot, image_input, text_input):
             pad_token_id=text_tokenizer.pad_token_id,
             use_cache=True
         )
-    output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
-    output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
-    chatbot.append((text_input, output))
-    return chatbot, ""
 def clear_chat():
     return [], None, ""
-md = f'''# <center>{model_name.split('/')[-1]}</center>
-###
-Ovis has been open-sourced on [GitHub](https://github.com/AIDC-AI/Ovis) and [Huggingface](https://huggingface.co/{model_name}). If you find Ovis useful, a star or a like would be appreciated.
-'''
 html = f"""
-<center><font size=8> {model_name.split('/')[-1]}</font></center>
 <center><font size=3>Ovis has been open-sourced on <a href='https://github.com/AIDC-AI/Ovis'>GitHub</a> and <a href='https://huggingface.co/{model_name}'>Huggingface</a>. If you find Ovis useful, a star or a like would be appreciated.</font></center>
 """
 latex_delimiters_set = [{
         "left": "\\(",
         "right": "\\)",
-        "display": False  # 行内公式
     }, {
         "left": "\\begin{equation}",
         "right": "\\end{equation}",
-        "display": True  # 块级公式
     }, {
         "left": "\\begin{align}",
         "right": "\\end{align}",
-        "display": True  # 块级公式
     }, {
         "left": "\\begin{alignat}",
         "right": "\\end{alignat}",
-        "display": True  # 块级公式
     }, {
         "left": "\\begin{gather}",
         "right": "\\end{gather}",
-        "display": True  # 块级公式
     }, {
         "left": "\\begin{CD}",
         "right": "\\end{CD}",
-        "display": True  # 块级公式
     }, {
         "left": "\\[",
         "right": "\\]",
-        "display": True  # 块级公式
     }]
 text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
 with gr.Blocks(title=model_name.split('/')[-1]) as demo:
-    # gr.Markdown(md)
     gr.HTML(html)
-    cur_dir = os.path.dirname(os.path.abspath(__file__))
     with gr.Row():
         with gr.Column(scale=3):
             image_input = gr.Image(label="image", height=350, type="pil")
             gr.Examples(
-                    examples=[
                     [f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
                     [f"{cur_dir}/examples/case1.png", "explain this model to me."],
                     [f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
-                    ],
                 inputs=[image_input, text_input]
             )
         with gr.Column(scale=7):
-            chatbot = gr.Chatbot(label="Ovis", layout="panel", height=620, show_copy_button=True, latex_delimiters=latex_delimiters_set)
             text_input.render()
             with gr.Row():
                 send_btn = gr.Button("Send", variant="primary")
                 clear_btn = gr.Button("Clear", variant="secondary")
-    send_click_event = send_btn.click(ovis_chat, [chatbot, image_input, text_input], [chatbot, text_input])
-    submit_event = text_input.submit(ovis_chat, [chatbot, image_input, text_input], [chatbot, text_input])
     clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
 demo.launch()

 import spaces
 import os
+import re
+import time
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM
+from transformers import TextIteratorStreamer
+from threading import Thread
 model_name = 'AIDC-AI/Ovis1.6-Gemma2-9B'
                                              trust_remote_code=True).to(device='cuda')
 text_tokenizer = model.get_text_tokenizer()
 visual_tokenizer = model.get_visual_tokenizer()
+streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
 image_placeholder = '<image>'
+cur_dir = os.path.dirname(os.path.abspath(__file__))
+def submit_chat(chatbot, text_input):
+    response = ''
+    chatbot.append((text_input, response))
+    return chatbot ,''
 @spaces.GPU
+def ovis_chat(chatbot, image_input):
     # preprocess inputs
     conversations = []
+    response = ""
+    text_input = chatbot[-1][0]
+    for query, response in chatbot[:-1]:
         conversations.append({
             "from": "human",
             "value": query
     else:
         pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
     with torch.inference_mode():
         gen_kwargs = dict(
             max_new_tokens=512,
             pad_token_id=text_tokenizer.pad_token_id,
             use_cache=True
         )
+    response = ""
+    thread = Thread(target=model.generate,
+                kwargs={"inputs": input_ids,
+                        "pixel_values": pixel_values,
+                        "attention_mask": attention_mask,
+                        "streamer": streamer,
+                        **gen_kwargs})
+    thread.start()
+    for new_text in streamer:
+        response += new_text
+        print(new_text)
+        chatbot[-1][1] = response
+        yield chatbot
+    thread.join()
 def clear_chat():
     return [], None, ""
+with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
+    svg_content = svg_file.read()
+font_size = "2.5em"
+svg_content = re.sub(r'(<svg[^>]*)(>)', rf'\1 height="{font_size}" style="vertical-align: middle; display: inline-block;"\2', svg_content)
 html = f"""
+<p align="center" style="font-size: {font_size}; line-height: 1;">
+    <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
+    <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
+</p>
 <center><font size=3>Ovis has been open-sourced on <a href='https://github.com/AIDC-AI/Ovis'>GitHub</a> and <a href='https://huggingface.co/{model_name}'>Huggingface</a>. If you find Ovis useful, a star or a like would be appreciated.</font></center>
 """
 latex_delimiters_set = [{
         "left": "\\(",
         "right": "\\)",
+        "display": False
     }, {
         "left": "\\begin{equation}",
         "right": "\\end{equation}",
+        "display": True
     }, {
         "left": "\\begin{align}",
         "right": "\\end{align}",
+        "display": True
     }, {
         "left": "\\begin{alignat}",
         "right": "\\end{alignat}",
+        "display": True
     }, {
         "left": "\\begin{gather}",
         "right": "\\end{gather}",
+        "display": True
     }, {
         "left": "\\begin{CD}",
         "right": "\\end{CD}",
+        "display": True
     }, {
         "left": "\\[",
         "right": "\\]",
+        "display": True
     }]
 text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
 with gr.Blocks(title=model_name.split('/')[-1]) as demo:
     gr.HTML(html)
     with gr.Row():
         with gr.Column(scale=3):
             image_input = gr.Image(label="image", height=350, type="pil")
             gr.Examples(
+                examples=[
                     [f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
                     [f"{cur_dir}/examples/case1.png", "explain this model to me."],
                     [f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
+                ],
                 inputs=[image_input, text_input]
             )
         with gr.Column(scale=7):
+            chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
             text_input.render()
             with gr.Row():
                 send_btn = gr.Button("Send", variant="primary")
                 clear_btn = gr.Button("Clear", variant="secondary")
+    send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
+    submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
     clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
 demo.launch()