Spaces:
Running
on
Zero
Running
on
Zero
File size: 5,253 Bytes
42fea26 2005ef8 42fea26 46b9cd3 42fea26 46b9cd3 42fea26 b1ee704 42fea26 46b9cd3 42fea26 2005ef8 42fea26 2005ef8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import spaces
import os
import gradio as gr
import torch
from transformers import AutoModelForCausalLM
model_name = 'AIDC-AI/Ovis1.6-Gemma2-9B'
# load model
model = AutoModelForCausalLM.from_pretrained(model_name,
torch_dtype=torch.bfloat16,
multimodal_max_length=8192,
trust_remote_code=True).to(device='cuda')
text_tokenizer = model.get_text_tokenizer()
visual_tokenizer = model.get_visual_tokenizer()
image_placeholder = '<image>'
@spaces.GPU
def ovis_chat(chatbot, image_input, text_input):
# preprocess inputs
conversations = []
for query, response in chatbot:
conversations.append({
"from": "human",
"value": query
})
conversations.append({
"from": "gpt",
"value": response
})
text_input = text_input.replace(image_placeholder, '')
conversations.append({
"from": "human",
"value": text_input
})
if image_input is not None:
conversations[0]["value"] = image_placeholder + '\n' + conversations[0]["value"]
prompt, input_ids, pixel_values = model.preprocess_inputs(conversations, [image_input])
attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id)
input_ids = input_ids.unsqueeze(0).to(device=model.device)
attention_mask = attention_mask.unsqueeze(0).to(device=model.device)
if image_input is None:
pixel_values = [None]
else:
pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device=visual_tokenizer.device)]
# generate output
with torch.inference_mode():
gen_kwargs = dict(
max_new_tokens=512,
do_sample=False,
top_p=None,
top_k=None,
temperature=None,
repetition_penalty=None,
eos_token_id=model.generation_config.eos_token_id,
pad_token_id=text_tokenizer.pad_token_id,
use_cache=True
)
output_ids = model.generate(input_ids, pixel_values=pixel_values, attention_mask=attention_mask, **gen_kwargs)[0]
output = text_tokenizer.decode(output_ids, skip_special_tokens=True)
chatbot.append((text_input, output))
return chatbot, ""
def clear_chat():
return [], None, ""
md = f'''# <center>{model_name.split('/')[-1]}</center>
###
Ovis has been open-sourced on [GitHub](https://github.com/AIDC-AI/Ovis) and [Huggingface](https://huggingface.co/{model_name}). If you find Ovis useful, a star or a like would be appreciated.
'''
html = f"""
<center><font size=8> {model_name.split('/')[-1]}</font></center>
<center><font size=3>Ovis has been open-sourced on <a href='https://github.com/AIDC-AI/Ovis'>GitHub</a> and <a href='https://huggingface.co/{model_name}'>Huggingface</a>. If you find Ovis useful, a star or a like would be appreciated.</font></center>
"""
latex_delimiters_set = [{
"left": "\\(",
"right": "\\)",
"display": False # 行内公式
}, {
"left": "\\begin{equation}",
"right": "\\end{equation}",
"display": True # 块级公式
}, {
"left": "\\begin{align}",
"right": "\\end{align}",
"display": True # 块级公式
}, {
"left": "\\begin{alignat}",
"right": "\\end{alignat}",
"display": True # 块级公式
}, {
"left": "\\begin{gather}",
"right": "\\end{gather}",
"display": True # 块级公式
}, {
"left": "\\begin{CD}",
"right": "\\end{CD}",
"display": True # 块级公式
}, {
"left": "\\[",
"right": "\\]",
"display": True # 块级公式
}]
text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
with gr.Blocks(title=model_name.split('/')[-1]) as demo:
# gr.Markdown(md)
gr.HTML(html)
cur_dir = os.path.dirname(os.path.abspath(__file__))
with gr.Row():
with gr.Column(scale=3):
image_input = gr.Image(label="image", height=350, type="pil")
gr.Examples(
examples=[
[f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
[f"{cur_dir}/examples/case1.png", "explain this model to me."],
[f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
],
inputs=[image_input, text_input]
)
with gr.Column(scale=7):
chatbot = gr.Chatbot(label="Ovis", layout="panel", height=800, show_copy_button=True, latex_delimiters=latex_delimiters_set)
text_input.render()
with gr.Row():
send_btn = gr.Button("Send", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
send_click_event = send_btn.click(ovis_chat, [chatbot, image_input, text_input], [chatbot, text_input])
submit_event = text_input.submit(ovis_chat, [chatbot, image_input, text_input], [chatbot, text_input])
clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
demo.launch()
|