Spaces:

fesvhtr
/

Bamboo_test_v1

Runtime error

App Files Files Community

fesvhtr commited on Feb 28, 2023

Commit

c0f0698

•

1 Parent(s): c2d01eb

Upload 15 files

Browse files

Files changed (15) hide show

Bamboo_v0-1_ViT-B16.pth.tar.convert +3 -0
README.md +7 -7
app.py +186 -0
app_bak.py +105 -0
examples/Ferrari-F355.jpg +0 -0
examples/basketball.jpg +0 -0
examples/dribbler.jpg +0 -0
examples/fratercula_arctica.jpg +0 -0
examples/husky.jpg +0 -0
examples/northern_oriole.jpg +0 -0
examples/playing_mahjong.jpg +0 -0
examples/taraxacum_erythrospermum.jpg +0 -0
requirements.txt +12 -0
timmvit.py +79 -0
trainid2name.json +0 -0

Bamboo_v0-1_ViT-B16.pth.tar.convert ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6d30c823ba2fc764291e65a06747390a81b15a1e655dd02b45d58528e08c937
+size 697651655

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Bamboo Test V1
-emoji: 🏃
-colorFrom: red
-colorTo: gray
 sdk: gradio
-sdk_version: 3.19.1
 app_file: app.py
 pinned: false
-license: openrail
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Bamboo ViT-B16 Demo
+emoji: 🎋
+colorFrom: blue
+colorTo: blue
 sdk: gradio
+sdk_version: 3.0.17
 app_file: app.py
 pinned: false
+license: cc-by-4.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import argparse
+import requests
+import gradio as gr
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+from PIL import Image
+import torchvision
+from torchvision import transforms
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+from timm.data import create_transform
+import openai
+from timmvit import timmvit
+import json
+from timm.models.hub import download_cached_file
+from PIL import Image
+import tempfile
+# key for GPT
+openai.api_key = "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"
+def pil_loader(filepath):
+    with Image.open(filepath) as img:
+        img = img.convert('RGB')
+    return img
+def build_transforms(input_size, center_crop=True):
+    transform = torchvision.transforms.Compose([
+        torchvision.transforms.ToPILImage(),
+        torchvision.transforms.Resize(input_size * 8 // 7),
+        torchvision.transforms.CenterCrop(input_size),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+             ])
+    return transform
+# Download human-readable labels for Bamboo.
+with open('./trainid2name.json') as f:
+    id2name = json.load(f)
+'''
+build model
+'''
+model = timmvit(pretrain_path='./Bamboo_v0-1_ViT-B16.pth.tar.convert')
+model.eval()
+'''
+borrow code from here: https://github.com/jacobgil/pytorch-grad-cam/blob/master/pytorch_grad_cam/utils/image.py
+'''
+def show_cam_on_image(img: np.ndarray,
+                      mask: np.ndarray,
+                      use_rgb: bool = False,
+                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
+    """ This function overlays the cam mask on the image as an heatmap.
+    By default the heatmap is in BGR format.
+    :param img: The base image in RGB or BGR format.
+    :param mask: The cam mask.
+    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
+    :param colormap: The OpenCV colormap to be used.
+    :returns: The default image with the cam overlay.
+    """
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
+    if use_rgb:
+        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
+    heatmap = np.float32(heatmap) / 255
+    if np.max(img) > 1:
+        raise Exception(
+            "The input image should np.float32 in the range [0, 1]")
+    cam = 0.7*heatmap + 0.3*img
+    # cam = cam / np.max(cam)
+    return np.uint8(255 * cam)
+def chat_with_GPT(my_prompt,history):
+    this_history = ''
+    for i in history:
+        for j in i:
+            this_history += j + '\n'
+    # print("----this_history----\n"+this_history)
+    # my_prompt = input('Please give your Q:')
+    my_resp = openai.Completion.create(
+    model="text-davinci-003",  # 模型选择达芬奇
+    prompt=this_history+my_prompt,  # 提问
+    temperature=0.8,
+    max_tokens=2000,  # 生成答案的字节数
+    top_p=1.0,  # 跟temperature有点类似，结果概率的前面的选择
+    frequency_penalty=0.5,  # [-2,2]频率太高的词的惩罚,就是减少重复的词出现(比如小于0会出现很多重复词)
+    presence_penalty=0.0,  # [-2,2]围绕着提问来回答的程度(比如小于0的回答会过于紧扣主题)
+    )
+    msg = my_resp.choices[0].text.strip()
+    return msg
+def run_chatbot(input, gr_state=[]):
+    history, conversation = gr_state[0],gr_state[1]
+    output = chat_with_GPT(input,history)
+    history.append((input, output))
+    conversation.append((input, output))
+    # chatbox, state
+    return conversation,[history,conversation]
+def run_chatbot_with_img(input_img,gr_state=[]):
+    history, conversation = gr_state[0],gr_state[1]
+    img_cls = recognize_image(input_img)
+    # conversation = conversation+ [(f'<img src="/file={input_img.name}" style="display: inline-block;">', "")]
+    input = 'I have given you a photo about '+ img_cls + ', and tell me its definition.'
+    output = chat_with_GPT(input,history)
+    input_mask = 'Upload image'
+    # conversation保存显示内容
+    conversation.append((input_mask,output))
+    # history保留真实内容
+    history.append((input, output))
+    # chatbox gr_state
+    return conversation , [history,conversation]
+def save_img(image):
+    filename = next(tempfile._get_candidate_names()) + '.png'
+    image.save(filename)
+    return filename
+def recognize_image(image):
+    img_t = eval_transforms(image)
+    # compute output
+    output = model(img_t.unsqueeze(0))
+    prediction = output.softmax(-1).flatten()
+    _,top5_idx = torch.topk(prediction, 5)
+    idx_max= top5_idx.tolist()[0]
+    print(id2name[str(idx_max)][0])
+    print(float(prediction[idx_max]))
+    # return {id2name[str(i)][0]: float(prediction[i]) for i in top5_idx.tolist()}
+    return id2name[str(idx_max)][0]
+def reset():
+    return [], [[],[]]
+eval_transforms = build_transforms(224)
+import openai
+import os
+with gr.Blocks() as demo:
+    gr.HTML("""
+        <h1>Bamboo</h1>
+        <p>Bamboo for Image Recognition Demo. Bamboo knows what this object is and what you are doing in a very fine-grain granularity: fratercula arctica (fig.5) and dribbler (fig.2)).</p>
+        <strong>Paper:</strong> <a href="https://arxiv.org/abs/2203.07845" target="_blank">https://arxiv.org/abs/2203.07845</a><br/>
+        <strong>Project Website:</strong> <a href="https://opengvlab.shlab.org.cn/bamboo/home" target="_blank">https://opengvlab.shlab.org.cn/bamboo/home</a><br/>
+        <strong>Code and Model:</strong> <a href="https://github.com/ZhangYuanhan-AI/Bamboo" target="_blank">https://github.com/ZhangYuanhan-AI/Bamboo</a><br/>
+        <strong>Tips:</strong><ul>
+        <li>We use Bamboo and GPT-3 from openai to build this demo</li>
+        </ul>
+    """)
+    # history for GPT, conversation for chatbox
+    gr_state = gr.State([[],[]])
+    chatbot = gr.Chatbot(elem_id="chatbot", label="Bamboo Chatbot")
+    text_input = gr.Textbox(label="Message", placeholder="Send a message")
+    image = gr.inputs.Image()
+    with gr.Row():
+        submit_btn = gr.Button("Submit Text", interactive=True,variant='primary' )
+        reset_btn = gr.Button("Reset All")
+        submit_btn_img = gr.Button("Submit Img", interactive=True,variant='primary')
+        clear_btn_img = gr.Button("Submit Img", interactive=True,variant='primary')
+    # image_btn = gr.UploadButton("Upload Image", file_types=["image"])
+    # image_btn.upload(run_chatbot_with_img, [image_btn,gr_state], [chatbot,gr_state])
+    text_input.submit(fn=run_chatbot,inputs=[text_input,gr_state],outputs=[chatbot,gr_state])
+    text_input.submit(lambda: "", None, text_input)
+    submit_btn.click(fn=run_chatbot,inputs=[text_input,gr_state],outputs=[chatbot,gr_state])
+    submit_btn.click(lambda: "", None, text_input)
+    reset_btn.click(fn=reset,inputs=[],outputs=[chatbot,gr_state])
+    submit_btn_img.click(run_chatbot_with_img, [image,gr_state], [chatbot,gr_state])
+demo.launch(debug = True)

app_bak.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import argparse
+import requests
+import gradio as gr
+import numpy as np
+import cv2
+import torch
+import torch.nn as nn
+from PIL import Image
+import torchvision
+from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
+from timm.data import create_transform
+from timmvit import timmvit
+import json
+from timm.models.hub import download_cached_file
+from PIL import Image
+def pil_loader(filepath):
+    with Image.open(filepath) as img:
+        img = img.convert('RGB')
+    return img
+def build_transforms(input_size, center_crop=True):
+    transform = torchvision.transforms.Compose([
+        torchvision.transforms.ToPILImage(),
+        torchvision.transforms.Resize(input_size * 8 // 7),
+        torchvision.transforms.CenterCrop(input_size),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+             ])
+    return transform
+# Download human-readable labels for Bamboo.
+with open('./trainid2name.json') as f:
+    id2name = json.load(f)
+'''
+build model
+'''
+model = timmvit(pretrain_path='./Bamboo_v0-1_ViT-B16.pth.tar.convert')
+model.eval()
+'''
+borrow code from here: https://github.com/jacobgil/pytorch-grad-cam/blob/master/pytorch_grad_cam/utils/image.py
+'''
+def show_cam_on_image(img: np.ndarray,
+                      mask: np.ndarray,
+                      use_rgb: bool = False,
+                      colormap: int = cv2.COLORMAP_JET) -> np.ndarray:
+    """ This function overlays the cam mask on the image as an heatmap.
+    By default the heatmap is in BGR format.
+    :param img: The base image in RGB or BGR format.
+    :param mask: The cam mask.
+    :param use_rgb: Whether to use an RGB or BGR heatmap, this should be set to True if 'img' is in RGB format.
+    :param colormap: The OpenCV colormap to be used.
+    :returns: The default image with the cam overlay.
+    """
+    heatmap = cv2.applyColorMap(np.uint8(255 * mask), colormap)
+    if use_rgb:
+        heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB)
+    heatmap = np.float32(heatmap) / 255
+    if np.max(img) > 1:
+        raise Exception(
+            "The input image should np.float32 in the range [0, 1]")
+    cam = 0.7*heatmap + 0.3*img
+    # cam = cam / np.max(cam)
+    return np.uint8(255 * cam)
+def recognize_image(image):
+    img_t = eval_transforms(image)
+    # compute output
+    output = model(img_t.unsqueeze(0))
+    prediction = output.softmax(-1).flatten()
+    _,top5_idx = torch.topk(prediction, 5)
+    return {id2name[str(i)][0]: float(prediction[i]) for i in top5_idx.tolist()}
+eval_transforms = build_transforms(224)
+image = gr.inputs.Image()
+label = gr.outputs.Label(num_top_classes=5)
+gr.Interface(
+    description="Bamboo for Image Recognition Demo (https://github.com/Davidzhangyuanhan/Bamboo). Bamboo knows what this object is and what you are doing in a very fine-grain granularity: fratercula arctica (fig.5) and dribbler (fig.2)).",
+    fn=recognize_image,
+    inputs=["image"],
+    outputs=[
+        label,
+    ],
+    examples=[
+    ["./examples/playing_mahjong.jpg"],
+    ["./examples/dribbler.jpg"],
+    ["./examples/Ferrari-F355.jpg"],
+    ["./examples/northern_oriole.jpg"],
+    ["./examples/fratercula_arctica.jpg"],
+    ["./examples/husky.jpg"],
+    ["./examples/taraxacum_erythrospermum.jpg"],
+    ],
+).launch()

examples/Ferrari-F355.jpg ADDED Viewed

examples/basketball.jpg ADDED Viewed

examples/dribbler.jpg ADDED Viewed

examples/fratercula_arctica.jpg ADDED Viewed

examples/husky.jpg ADDED Viewed

examples/northern_oriole.jpg ADDED Viewed

examples/playing_mahjong.jpg ADDED Viewed

examples/taraxacum_erythrospermum.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+torchvision==0.14.1
+torch==1.13.1
+opencv-python-headless<4.3
+timm==0.6.12
+numpy==1.21.5
+requests==2.25.1
+gradio==3.19.1
+opencv-python==4.7.0.68
+openai==0.26.5
+pillow==9.3.0

timmvit.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# ------------------------------------------------------------------------
+# Modified from DETR (https://github.com/facebookresearch/detr)
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
+# ------------------------------------------------------------------------
+import timm
+import torch
+import copy
+import torch.nn as nn
+import torchvision
+import json
+from timm.models.hub import download_cached_file
+from PIL import Image
+class MyViT(nn.Module):
+    def __init__(self, num_classes=115217, pretrain_path=None, enable_fc=False):
+        super().__init__()
+        print('initializing ViT model as backbone using ckpt:', pretrain_path)
+        self.model = timm.create_model('vit_base_patch16_224',checkpoint_path=pretrain_path,num_classes=num_classes)# pretrained=True)
+    # def forward_features(self, x):
+    #     x = self.model.patch_embed(x)
+    #     cls_token = self.model.cls_token.expand(x.shape[0], -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+    #     if self.model.dist_token is None:
+    #         x = torch.cat((cls_token, x), dim=1)
+    #     else:
+    #         x = torch.cat((cls_token, self.model.dist_token.expand(x.shape[0], -1, -1), x), dim=1)
+    #     x = self.model.pos_drop(x + self.model.pos_embed)
+    #     x = self.model.blocks(x)
+    #     x = self.model.norm(x)
+        # return self.model.pre_logits(x[:, 0])
+    def forward(self, x):
+        x = self.model.forward(x)
+        return x
+def timmvit(**kwargs):
+    default_kwargs={}
+    default_kwargs.update(**kwargs)
+    return MyViT(**default_kwargs)
+def build_transforms(input_size, center_crop=True):
+    transform = torchvision.transforms.Compose([
+        torchvision.transforms.Resize(input_size * 8 // 7),
+        torchvision.transforms.CenterCrop(input_size),
+        torchvision.transforms.ToTensor(),
+        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+             ])
+    return transform
+def pil_loader(filepath):
+    with Image.open(filepath) as img:
+        img = img.convert('RGB')
+    return img
+def test_build():
+    with open('/mnt/lustre/yhzhang/bamboo/Bamboo_ViT-B16_demo/trainid2name.json') as f:
+        id2name = json.load(f)
+    img = pil_loader('/mnt/lustre/yhzhang/bamboo/Bamboo_ViT-B16_demo/142520422_6ad756ddf6_w_d.jpg')
+    eval_transforms = build_transforms(224)
+    img_t = eval_transforms(img)
+    img_t = img_t[None, :]
+    model = MyViT(pretrain_path='/mnt/lustre/yhzhang/bamboo/Bamboo_ViT-B16_demo/Bamboo_v0-1_ViT-B16.pth.tar.convert')
+    # image = torch.rand(1, 3, 224, 224)
+    output = model(img_t)
+    # import pdb;pdb.set_trace()
+    prediction = output.softmax(-1).flatten()
+    _,top5_idx = torch.topk(prediction, 5)
+    # import pdb;pdb.set_trace()
+    print({id2name[str(i)][0]: float(prediction[i]) for i in top5_idx.tolist()})
+if __name__ == '__main__':
+    test_build()

trainid2name.json ADDED Viewed

The diff for this file is too large to render. See raw diff