#!/usr/bin/env python
# this code modify from https://huggingface.co/spaces/lykeven/visualglm-6b
import gradio as gr
import re
from PIL import Image
import torch
from io import BytesIO
import hashlib
import os
from transformers import LlamaForCausalLM, LlamaTokenizer, BlipImageProcessor, BitsAndBytesConfig, AutoModelForCausalLM
DESCRIPTION = '''# Ziya-Blip2-14B'''
MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.\nHint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.'
MAINTENANCE_NOTICE2 = '提示1: 如果应用报了“Something went wrong, connection error out”的错误,请关闭代理并重试。\n提示2: 如果你上传了很大的图片,比如10MB大小,那将需要一些时间来上传和处理,请耐心等待。'
NOTES = 'This app is adapted from https://huggingface.co/IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1. It would be recommended to check out the repo if you want to see the detail of our model. And most of the codes attach to this demo are modify from lykeven/visualglm-6b.'
import json
default_chatbox = []
def is_chinese(text):
zh_pattern = re.compile(u'[\u4e00-\u9fa5]+')
return zh_pattern.search(text)
AUTH_TOKEN = os.getenv("AUTH_TOKEN")
LM_MODEL_PATH = "gxy/Ziya-LLaMA-13B-v1"
lm_model = LlamaForCausalLM.from_pretrained(
LM_MODEL_PATH,
device_map="auto",
torch_dtype=torch.float16,
use_auth_token=AUTH_TOKEN,
quantization_config=BitsAndBytesConfig(load_in_8bit=True))
tokenizer = LlamaTokenizer.from_pretrained(LM_MODEL_PATH)
# visual model
OPENAI_CLIP_MEAN = [0.48145466, 0.4578275, 0.40821073]
OPENAI_CLIP_STD = [0.26862954, 0.26130258, 0.27577711]
# demo.py is in the project path, so we can use local path ".". Otherwise you should use "IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1"
model = AutoModelForCausalLM.from_pretrained(
"IDEA-CCNL/Ziya-BLIP2-14B-Visual-v1",
trust_remote_code=True,
torch_dtype=torch.float16)
model.cuda() # if you use on cpu, comment this line
model.language_model = lm_model
image_size = model.config.vision_config.image_size
image_processor = BlipImageProcessor(
size={"height": image_size, "width": image_size},
image_mean=OPENAI_CLIP_MEAN,
image_std=OPENAI_CLIP_STD,
)
def post(
input_text,
temperature,
top_p,
image_prompt,
result_previous,
hidden_image
):
result_text = [(ele[0], ele[1]) for ele in result_previous]
previous_querys = []
previous_outputs = []
for i in range(len(result_text)-1, -1, -1):
if result_text[i][0] == "":
del result_text[i]
else:
previous_querys.append(result_text[i][0])
previous_outputs.append(result_text[i][1])
is_zh = is_chinese(input_text)
if image_prompt is None:
print("Image empty")
if is_zh:
result_text.append((input_text, '图片为空!请上传图片并重试。'))
else:
result_text.append((input_text, 'Image empty! Please upload a image and retry.'))
return input_text, result_text, hidden_image
elif input_text == "":
print("Text empty")
result_text.append((input_text, 'Text empty! Please enter text and retry.'))
return "", result_text, hidden_image
generate_config = {
"max_new_tokens": 128,
"top_p": top_p,
"temperature": temperature,
"repetition_penalty": 1.18,
}
img = Image.open(image_prompt)
pixel_values = image_processor(
img,
return_tensors="pt").pixel_values.to(
model.device).to(model.dtype)
output_buffer = BytesIO()
img.save(output_buffer, "PNG")
byte_data = output_buffer.getvalue()
md = hashlib.md5()
md.update(byte_data)
img_hash = md.hexdigest()
if img_hash != hidden_image:
previous_querys = []
previous_outputs = []
result_text = []
answer = model.chat(
tokenizer=tokenizer,
pixel_values=pixel_values,
query=input_text,
previous_querys=previous_querys,
previous_outputs=previous_outputs,
**generate_config,
)
result_text.append((input_text, answer))
print(result_text)
return "", result_text, img_hash
def clear_fn(value):
return "", default_chatbox, None
def clear_fn2(value):
return default_chatbox
def io_fn(a, b, c):
print(f"call io_fn")
return a, b
def change_language(value):
if value == "Change hint to English":
return "提示变为中文", MAINTENANCE_NOTICE1
else:
return "Change hint to English", MAINTENANCE_NOTICE2
def main():
gr.close_all()
examples = []
with open("./examples/example_inputs.jsonl") as f:
for line in f:
data = json.loads(line)
examples.append(data)
with gr.Blocks(css='style.css') as demo:
with gr.Row():
with gr.Column(scale=4.5):
with gr.Group():
input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.')
with gr.Row():
run_button = gr.Button('Generate')
clear_button = gr.Button('Clear')
image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None)
with gr.Row():
temperature = gr.Slider(maximum=1, value=0.7, minimum=0, label='Temperature')
top_p = gr.Slider(maximum=1, value=0.1, minimum=0, label='Top P')
with gr.Group():
with gr.Row():
with gr.Column(scale=7):
maintenance_notice = gr.Markdown(MAINTENANCE_NOTICE1)
with gr.Column(scale=2):
change_button = gr.Button('Change hint to English', visible=False)
with gr.Column(scale=5.5):
result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[]).style(height=550)
hidden_image_hash = gr.Textbox(visible=False)
gr_examples = gr.Examples(examples=[[example["text"], example["image"]] for example in examples],
inputs=[input_text, image_prompt],
label="Example Inputs (Click to insert an examplet into the input box)",
examples_per_page=3)
gr.Markdown(NOTES)
print(gr.__version__)
run_button.click(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
outputs=[input_text, result_text, hidden_image_hash])
input_text.submit(fn=post,inputs=[input_text, temperature, top_p, image_prompt, result_text, hidden_image_hash],
outputs=[input_text, result_text, hidden_image_hash])
clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt])
image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text])
print(gr.__version__)
demo.queue(concurrency_count=10)
demo.launch(server_name="0.0.0.0")
if __name__ == '__main__':
main()