#!/usr/bin/env python import gradio as gr import os import json import requests import time from concurrent.futures import ThreadPoolExecutor from utils import is_chinese, process_image_without_resize, parse_response, templates_agent_cogagent, template_grounding_cogvlm, postprocess_text DESCRIPTION = '''

CogVLM & CogAgent Chat Demo

''' NOTES = 'This app is adapted from https://github.com/THUDM/CogVLM. It would be recommended to check out the repo if you want to see the detail of our model.\n\n该demo仅作为测试使用,不支持批量请求。如有大批量需求,欢迎联系[智谱AI](mailto:business@zhipuai.cn)。\n\n请注意CoogVLM-17B目前仅支持英文。' MAINTENANCE_NOTICE1 = 'Hint 1: If the app report "Something went wrong, connection error out", please turn off your proxy and retry.
Hint 2: If you upload a large size of image like 10MB, it may take some time to upload and process. Please be patient and wait.' GROUNDING_NOTICE = 'Hint: When you check "Grounding", please use the corresponding prompt or the examples below.' AGENT_NOTICE = 'Hint: When you check "CogAgent", please use the corresponding prompt or the examples below.' default_chatbox = [("", "Hi, What do you want to know about this image?")] URL = os.environ.get("URL") def make_request(URL, headers, data): response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100)) return response.json() def post( input_text, temperature, top_p, top_k, image_prompt, result_previous, hidden_image, grounding, cogagent, grounding_template, agent_template ): result_text = [(ele[0], ele[1]) for ele in result_previous] for i in range(len(result_text)-1, -1, -1): if result_text[i][0] == "" or result_text[i][0] == None: del result_text[i] print(f"history {result_text}") is_zh = is_chinese(input_text) if image_prompt is None: print("Image empty") if is_zh: result_text.append((input_text, '图片为空!请上传图片并重试。')) else: result_text.append((input_text, 'Image empty! Please upload a image and retry.')) return input_text, result_text, hidden_image elif input_text == "": print("Text empty") result_text.append((input_text, 'Text empty! Please enter text and retry.')) return "", result_text, hidden_image headers = { "Content-Type": "application/json; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36", } if image_prompt: pil_img, encoded_img, image_hash, image_path_grounding = process_image_without_resize(image_prompt) print(f"image_hash:{image_hash}, hidden_image_hash:{hidden_image}") if hidden_image is not None and image_hash != hidden_image: print("image has been update") result_text = [] hidden_image = image_hash else: encoded_img = None model_use = "vlm_chat" if not cogagent and grounding: model_use = "vlm_grounding" if grounding_template: input_text = postprocess_text(grounding_template, input_text) elif cogagent: model_use = "agent_chat" if agent_template is not None and agent_template != "do not use template": input_text = postprocess_text(agent_template, input_text) prompt = input_text if grounding: prompt += "(with grounding)" print(f'request {model_use} model... with prompt {prompt}, grounding_template {grounding_template}, agent_template {agent_template}') data = json.dumps({ 'model_use': model_use, 'is_grounding': grounding, 'text': prompt, 'history': result_text, 'image': encoded_img, 'temperature': temperature, 'top_p': top_p, 'top_k': top_k, 'do_sample': True, 'max_new_tokens': 2048 }) try: with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(make_request, URL, headers, data) # time.sleep(15) response = future.result() # Blocks until the request is complete # response = requests.request("POST", URL, headers=headers, data=data, timeout=(60, 100)).json() except Exception as e: print("error message", e) if is_zh: result_text.append((input_text, '超时!请稍等几分钟再重试。')) else: result_text.append((input_text, 'Timeout! Please wait a few minutes and retry.')) return "", result_text, hidden_image print('request done...') # response = {'result':input_text} answer = str(response['result']) if grounding: parse_response(pil_img, answer, image_path_grounding) new_answer = answer.replace(input_text, "") result_text.append((input_text, new_answer)) result_text.append((None, (image_path_grounding,))) else: result_text.append((input_text, answer)) print(result_text) print('finished') return "", result_text, hidden_image def clear_fn(value): return "", default_chatbox, None def clear_fn2(value): return default_chatbox def main(): gr.close_all() examples = [] with open("./examples/example_inputs.jsonl") as f: for line in f: data = json.loads(line) examples.append(data) with gr.Blocks(css='style.css') as demo: gr.Markdown(DESCRIPTION) gr.Markdown(NOTES) with gr.Row(): with gr.Column(scale=4.5): with gr.Group(): input_text = gr.Textbox(label='Input Text', placeholder='Please enter text prompt below and press ENTER.') with gr.Row(): run_button = gr.Button('Generate') clear_button = gr.Button('Clear') image_prompt = gr.Image(type="filepath", label="Image Prompt", value=None) with gr.Row(): grounding = gr.Checkbox(label="Grounding") cogagent = gr.Checkbox(label="CogAgent") with gr.Row(): # grounding_notice = gr.Markdown(GROUNDING_NOTICE) grounding_template = gr.Dropdown(choices=template_grounding_cogvlm, label="Grounding Template", value=template_grounding_cogvlm[0]) # agent_notice = gr.Markdown(AGENT_NOTICE) agent_template = gr.Dropdown(choices=templates_agent_cogagent, label="Agent Template", value=templates_agent_cogagent[0]) with gr.Row(): temperature = gr.Slider(maximum=1, value=0.9, minimum=0, label='Temperature') top_p = gr.Slider(maximum=1, value=0.8, minimum=0, label='Top P') top_k = gr.Slider(maximum=50, value=5, minimum=1, step=1, label='Top K') with gr.Column(scale=5.5): result_text = gr.components.Chatbot(label='Multi-round conversation History', value=[("", "Hi, What do you want to know about this image?")], height=550) hidden_image_hash = gr.Textbox(visible=False) gr_examples = gr.Examples(examples=[[example["text"], example["image"], example["grounding"], example["cogagent"]] for example in examples], inputs=[input_text, image_prompt, grounding, cogagent], label="Example Inputs (Click to insert an examplet into the input box)", examples_per_page=6) gr.Markdown(MAINTENANCE_NOTICE1) print(gr.__version__) run_button.click(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template], outputs=[input_text, result_text, hidden_image_hash]) input_text.submit(fn=post,inputs=[input_text, temperature, top_p, top_k, image_prompt, result_text, hidden_image_hash, grounding, cogagent, grounding_template, agent_template], outputs=[input_text, result_text, hidden_image_hash]) clear_button.click(fn=clear_fn, inputs=clear_button, outputs=[input_text, result_text, image_prompt]) image_prompt.upload(fn=clear_fn2, inputs=clear_button, outputs=[result_text]) image_prompt.clear(fn=clear_fn2, inputs=clear_button, outputs=[result_text]) print(gr.__version__) demo.queue(concurrency_count=10) demo.launch(server_port=7862) if __name__ == '__main__': main()