import os import requests import gradio as gr url_caption = os.environ["CAPTION_NODE"] url_vqa = os.environ["VQA_NODE"] def image_caption(file_path): files = {"picture": open(file_path, "rb")} resp = requests.post(url_caption, files=files, verify=False) resp = resp.json() desc = resp["data"]["desc"] return desc def vqa(file_path, question): files = {"picture": open(file_path, "rb")} question = {"question": question} resp = requests.post(url_vqa, files=files, data=question, verify=False) resp = resp.json() ans = resp["data"]["answer"] return ans def read_content(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() return content examples_caption = [ os.path.join(os.path.dirname(__file__), "examples/caption/00.jpg"), os.path.join(os.path.dirname(__file__), "examples/caption/01.jpg"), os.path.join(os.path.dirname(__file__), "examples/caption/02.jpg"), os.path.join(os.path.dirname(__file__), "examples/caption/03.jpg"), os.path.join(os.path.dirname(__file__), "examples/caption/04.jpg"), os.path.join(os.path.dirname(__file__), "examples/caption/05.jpg") ] examples_vqa = [ os.path.join(os.path.dirname(__file__), "examples/vqa/00.jpg"), os.path.join(os.path.dirname(__file__), "examples/vqa/01.jpg"), os.path.join(os.path.dirname(__file__), "examples/vqa/02.jpg"), os.path.join(os.path.dirname(__file__), "examples/vqa/03.jpg"), os.path.join(os.path.dirname(__file__), "examples/vqa/04.jpg"), os.path.join(os.path.dirname(__file__), "examples/vqa/05.jpg") ] css = """ .gradio-container {background-image: url('file=./background.jpg'); background-size:cover; background-repeat: no-repeat;} #infer { background: linear-gradient(to bottom right, #FFD8B4, #FFB066); border: 1px solid #ffd8b4; border-radius: 8px; color: #ee7400 } """ with gr.Blocks(css=css) as demo: gr.HTML(read_content("./header.html")) gr.Markdown("# MindSpore Zidongtaichu ") gr.Markdown( "\nOPT (Omni-Perception Pre-Trainer) is the abbreviation of the full-scene perception pre-training model. " " It is an important achievement of the Chinese Academy of Sciences Automation and Huawei on the road to exploring general artificial intelligence." " The modal 100 billion large model, the Chinese name is Zidong.Taichu." " supports efficient collaboration among different modalities of text, vision, and voice," " and can support industrial applications such as film and television creation, industrial quality inspection, and intelligent driving." ) with gr.Tab("以图生文 (Image Caption)"): with gr.Row(): caption_input = gr.Image( type="filepath", value=examples_caption[0], ) caption_output = gr.TextArea(label="description", interactive=False) caption_button = gr.Button("Submit", elem_id="infer") gr.Examples( examples=examples_caption, inputs=caption_input, ) caption_button.click(image_caption, inputs=[caption_input], outputs=[caption_output]) with gr.Tab("视觉问答 (VQA)"): with gr.Row(): with gr.Column(): q_pic_input = gr.Image(type="filepath", label="step1: select a picture") gr.Examples( examples=examples_vqa, inputs=q_pic_input, ) with gr.Column(): vqa_question = gr.TextArea( label="step2: question", lines=5, placeholder="please enter a question related to the picture" ) vqa_answer = gr.TextArea(label="answer", lines=5, interactive=False) vqa_button = gr.Button("Submit", elem_id="infer") vqa_button.click(vqa, inputs=[q_pic_input, vqa_question], outputs=[vqa_answer]) with gr.Accordion("Open for More!"): gr.Markdown( "- If you want to know more about the foundation models of MindSpore, please visit " "[The Foundation Models Platform for Mindspore](https://xihe.mindspore.cn/)" ) gr.Markdown( "- If you want to know more about OPT models, please visit " "[OPT](https://gitee.com/mindspore/zidongtaichu)") gr.Markdown( "- Try [zidongtaichu model on the Foundation Models Platform for Mindspore]" "(https://xihe.mindspore.cn/modelzoo/taichug)") demo.queue(concurrency_count=5) demo.launch(enable_queue=True)