from turtle import title import gradio as gr from transformers import pipeline import numpy as np from PIL import Image pipes = { "OpenAI-ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"), "OpenAI-ViT/L-14": pipeline("zero-shot-image-classification", model="openai/clip-vit-large-patch14"), "CN-ViT/B-16": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-base-patch16"), "CN-ViT/L-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14"), "CN-ViT/L-14@336px": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14-336px"), "CN-ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"), } inputs = [ gr.inputs.Image(type='pil', label="Image 输入图片"), gr.inputs.Textbox(lines=1, label="Candidate Labels 候选分类标签"), gr.inputs.Radio(choices=[ "OpenAI-ViT/B-16", "OpenAI-ViT/L-14", "CN-ViT/B-16", "CN-ViT/L-14", "CN-ViT/L-14@336px", "CN-ViT/H-14", ], type="value", default="ViT/B-16", label="Model 模型规模"), gr.inputs.Textbox(lines=1, label="Prompt Template Prompt模板 ({}指代候选标签)", default="一张{}的图片。"), ] images="festival.jpg" def shot(image, labels_text, model_name, hypothesis_template): labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")] res = pipes[model_name](images=image, candidate_labels=labels, hypothesis_template=hypothesis_template) return {dic["label"]: dic["score"] for dic in res} lei = "机动车道,非机动车道,人车混行道路,斑马线人行道,主干道路,乡间道路,内部小巷,人行横道,十字路口,丁字路口,岔路口,铁路沿线,铁路路口,高架桥,立交桥,过街天桥,桥梁,天桥上下口,地下隧道,地下人行通道,隧道通行区域,穿山隧道,隧道出入口,水池,河流,湖面,室外停车场,路面划线停车位,城市广场,裸露农田,林区,草坪,树木,公交站台,收费站,检查站,加油站,岗亭,车行道闸,人行闸机,安检机器,铁门,保安亭,门或电动门,人员出入口,车辆出入口,广告牌,横幅,沿街商铺,露天烧烤摊,超市,建筑施工,道路施工,人员卡口,车辆卡口,人行闸机,场所主出入口,安检门,X光安检机,电梯内部,扶梯,楼梯,台阶,室内通道,走廊,前台区域,公共大厅,室内停车场" en_lei = "Motor Vehicle Lane,Non-motor Vehicle Lane,Mixed Traffic Road,Zebra Crossing,Main Road,Rural Road,Alley,Pedestrian Crossing,Crossroads,T-junction,Intersection,Railway Alongline,Railway Crossing,Overpass,Flyover,Pedestrian Overpass,Bridge,Overpass Entrance and Exit,Underpass,Pedestrian Tunnel,Tunnel Traffic Area,Mountain Tunnel,Tunnel Entrance and Exit,Pool,River,Lake,Outdoor Parking Lot,Lined Parking Space,City Square,Exposed Farmland,Forest Area,Lawn,Trees,Bus Stop,Toll Station,Checkpoint,Gas Station,Guardhouse,Vehicle Gate,Pedestrian Gate,Security Check Machine,Iron Gate,Security Booth,Gate or Electric Gate,Personnel Entrance and Exit,Vehicle Entrance and Exit,Advertising Board,Banner,Street-side Shops,Open-air Barbecue Stall,Supermarket,Building Construction,Road Construction,Personnel Checkpoint,Vehicle Checkpoint,Pedestrian Gate,Main Entrance,Security Check Door,X-ray Security Machine,Elevator Interior,Escalator,Stairs,Steps,Indoor Passage,Corridor,Front Desk Area,Public Hall,Indoor Parking Lot" iface = gr.Interface(shot, inputs, "label", examples=[["street.jpg", lei, "CN-ViT/B-16", "一张{}的图片。"], ["street.jpg", en_lei, "OpenAI-ViT/B-16", "A picture of {}."] ], description="""

To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片,并输入多个分类标签,用英文逗号分隔。可点击页面最下方示例参考。
You can duplicate this space and run it privately: Duplicate Space

""", title="监控场景识别") iface.launch()