root commited on
Commit
e16319b
1 Parent(s): ff39987

add svg; add streaming

Browse files
Files changed (1) hide show
  1. app.py +48 -32
app.py CHANGED
@@ -1,9 +1,12 @@
1
  import spaces
2
  import os
3
-
 
4
  import gradio as gr
5
  import torch
6
  from transformers import AutoModelForCausalLM
 
 
7
 
8
  model_name = 'AIDC-AI/Ovis1.6-Gemma2-9B'
9
 
@@ -14,14 +17,22 @@ model = AutoModelForCausalLM.from_pretrained(model_name,
14
  trust_remote_code=True).to(device='cuda')
15
  text_tokenizer = model.get_text_tokenizer()
16
  visual_tokenizer = model.get_visual_tokenizer()
 
17
  image_placeholder = '<image>'
 
18
 
 
 
 
 
19
 
20
  @spaces.GPU
21
- def ovis_chat(chatbot, image_input, text_input):
22
  # preprocess inputs
23
  conversations = []
24
- for query, response in chatbot:
 
 
25
  conversations.append({
26
  "from": "human",
27
  "value": query
@@ -46,7 +57,6 @@ def ovis_chat(chatbot, image_input, text_input):
46
  else:
47
  pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
48
 
49
- # generate output
50
  with torch.inference_mode():
51
  gen_kwargs = dict(
52
  max_new_tokens=512,
@@ -59,83 +69,89 @@ def ovis_chat(chatbot, image_input, text_input):
59
  pad_token_id=text_tokenizer.pad_token_id,
60
  use_cache=True
61
  )
62
- output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
63
- output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
64
- chatbot.append((text_input, output))
65
-
66
- return chatbot, ""
67
-
 
 
 
 
 
 
 
 
68
 
69
  def clear_chat():
70
  return [], None, ""
71
 
72
- md = f'''# <center>{model_name.split('/')[-1]}</center>
73
- ###
74
- Ovis has been open-sourced on [GitHub](https://github.com/AIDC-AI/Ovis) and [Huggingface](https://huggingface.co/{model_name}). If you find Ovis useful, a star or a like would be appreciated.
75
- '''
76
-
77
  html = f"""
78
- <center><font size=8> {model_name.split('/')[-1]}</font></center>
 
 
 
79
  <center><font size=3>Ovis has been open-sourced on <a href='https://github.com/AIDC-AI/Ovis'>GitHub</a> and <a href='https://huggingface.co/{model_name}'>Huggingface</a>. If you find Ovis useful, a star or a like would be appreciated.</font></center>
80
  """
81
 
82
-
83
  latex_delimiters_set = [{
84
  "left": "\\(",
85
  "right": "\\)",
86
- "display": False # 行内公式
87
  }, {
88
  "left": "\\begin{equation}",
89
  "right": "\\end{equation}",
90
- "display": True # 块级公式
91
  }, {
92
  "left": "\\begin{align}",
93
  "right": "\\end{align}",
94
- "display": True # 块级公式
95
  }, {
96
  "left": "\\begin{alignat}",
97
  "right": "\\end{alignat}",
98
- "display": True # 块级公式
99
  }, {
100
  "left": "\\begin{gather}",
101
  "right": "\\end{gather}",
102
- "display": True # 块级公式
103
  }, {
104
  "left": "\\begin{CD}",
105
  "right": "\\end{CD}",
106
- "display": True # 块级公式
107
  }, {
108
  "left": "\\[",
109
  "right": "\\]",
110
- "display": True # 块级公式
111
  }]
112
 
113
-
114
  text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
115
  with gr.Blocks(title=model_name.split('/')[-1]) as demo:
116
- # gr.Markdown(md)
117
  gr.HTML(html)
118
- cur_dir = os.path.dirname(os.path.abspath(__file__))
119
  with gr.Row():
120
  with gr.Column(scale=3):
121
  image_input = gr.Image(label="image", height=350, type="pil")
122
  gr.Examples(
123
- examples=[
124
  [f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
125
  [f"{cur_dir}/examples/case1.png", "explain this model to me."],
126
  [f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
127
- ],
128
  inputs=[image_input, text_input]
129
  )
130
  with gr.Column(scale=7):
131
- chatbot = gr.Chatbot(label="Ovis", layout="panel", height=620, show_copy_button=True, latex_delimiters=latex_delimiters_set)
132
  text_input.render()
133
  with gr.Row():
134
  send_btn = gr.Button("Send", variant="primary")
135
  clear_btn = gr.Button("Clear", variant="secondary")
136
 
137
- send_click_event = send_btn.click(ovis_chat, [chatbot, image_input, text_input], [chatbot, text_input])
138
- submit_event = text_input.submit(ovis_chat, [chatbot, image_input, text_input], [chatbot, text_input])
139
  clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
140
 
141
  demo.launch()
 
1
  import spaces
2
  import os
3
+ import re
4
+ import time
5
  import gradio as gr
6
  import torch
7
  from transformers import AutoModelForCausalLM
8
+ from transformers import TextIteratorStreamer
9
+ from threading import Thread
10
 
11
  model_name = 'AIDC-AI/Ovis1.6-Gemma2-9B'
12
 
 
17
  trust_remote_code=True).to(device='cuda')
18
  text_tokenizer = model.get_text_tokenizer()
19
  visual_tokenizer = model.get_visual_tokenizer()
20
+ streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
21
  image_placeholder = '<image>'
22
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
23
 
24
+ def submit_chat(chatbot, text_input):
25
+ response = ''
26
+ chatbot.append((text_input, response))
27
+ return chatbot ,''
28
 
29
  @spaces.GPU
30
+ def ovis_chat(chatbot, image_input):
31
  # preprocess inputs
32
  conversations = []
33
+ response = ""
34
+ text_input = chatbot[-1][0]
35
+ for query, response in chatbot[:-1]:
36
  conversations.append({
37
  "from": "human",
38
  "value": query
 
57
  else:
58
  pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
59
 
 
60
  with torch.inference_mode():
61
  gen_kwargs = dict(
62
  max_new_tokens=512,
 
69
  pad_token_id=text_tokenizer.pad_token_id,
70
  use_cache=True
71
  )
72
+ response = ""
73
+ thread = Thread(target=model.generate,
74
+ kwargs={"inputs": input_ids,
75
+ "pixel_values": pixel_values,
76
+ "attention_mask": attention_mask,
77
+ "streamer": streamer,
78
+ **gen_kwargs})
79
+ thread.start()
80
+ for new_text in streamer:
81
+ response += new_text
82
+ print(new_text)
83
+ chatbot[-1][1] = response
84
+ yield chatbot
85
+ thread.join()
86
 
87
  def clear_chat():
88
  return [], None, ""
89
 
90
+ with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
91
+ svg_content = svg_file.read()
92
+ font_size = "2.5em"
93
+ svg_content = re.sub(r'(<svg[^>]*)(>)', rf'\1 height="{font_size}" style="vertical-align: middle; display: inline-block;"\2', svg_content)
 
94
  html = f"""
95
+ <p align="center" style="font-size: {font_size}; line-height: 1;">
96
+ <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
97
+ <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
98
+ </p>
99
  <center><font size=3>Ovis has been open-sourced on <a href='https://github.com/AIDC-AI/Ovis'>GitHub</a> and <a href='https://huggingface.co/{model_name}'>Huggingface</a>. If you find Ovis useful, a star or a like would be appreciated.</font></center>
100
  """
101
 
 
102
  latex_delimiters_set = [{
103
  "left": "\\(",
104
  "right": "\\)",
105
+ "display": False
106
  }, {
107
  "left": "\\begin{equation}",
108
  "right": "\\end{equation}",
109
+ "display": True
110
  }, {
111
  "left": "\\begin{align}",
112
  "right": "\\end{align}",
113
+ "display": True
114
  }, {
115
  "left": "\\begin{alignat}",
116
  "right": "\\end{alignat}",
117
+ "display": True
118
  }, {
119
  "left": "\\begin{gather}",
120
  "right": "\\end{gather}",
121
+ "display": True
122
  }, {
123
  "left": "\\begin{CD}",
124
  "right": "\\end{CD}",
125
+ "display": True
126
  }, {
127
  "left": "\\[",
128
  "right": "\\]",
129
+ "display": True
130
  }]
131
 
 
132
  text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
133
  with gr.Blocks(title=model_name.split('/')[-1]) as demo:
 
134
  gr.HTML(html)
 
135
  with gr.Row():
136
  with gr.Column(scale=3):
137
  image_input = gr.Image(label="image", height=350, type="pil")
138
  gr.Examples(
139
+ examples=[
140
  [f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
141
  [f"{cur_dir}/examples/case1.png", "explain this model to me."],
142
  [f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
143
+ ],
144
  inputs=[image_input, text_input]
145
  )
146
  with gr.Column(scale=7):
147
+ chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
148
  text_input.render()
149
  with gr.Row():
150
  send_btn = gr.Button("Send", variant="primary")
151
  clear_btn = gr.Button("Clear", variant="secondary")
152
 
153
+ send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
154
+ submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
155
  clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
156
 
157
  demo.launch()