compare_clip_siglip

Sleeping

File size: 1,473 Bytes

29afcce
530cb47
 
29afcce
 
 
3e9654b
29afcce
530cb47
29afcce
 
 
 
 
 
 
 
177b4b7
29afcce
 
 
 
 
 
 
 
 
 
 
32e7bd9
29afcce
 
 
 
 
 
 
 
 
 
 
 
 
f3032c4

import torch
from transformers import pipeline, SiglipModel, AutoProcessor
import numpy as np
import gradio as gr


clip_checkpoint = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
clip_detector = pipeline(model=clip_checkpoint, task="zero-shot-image-classification")


def postprocess(output):
  return {out["label"]: float(out["score"]) for out in output}


def infer(image, candidate_labels):
  candidate_labels = [label.lstrip(" ") for label in candidate_labels.split(",")]
  clip_out = clip_detector(image, candidate_labels=candidate_labels)
  return postprocess(clip_out)

with gr.Blocks() as demo:
  gr.Markdown("# Compare CLIP and SigLIP")
  gr.Markdown("Compare the performance of CLIP and SigLIP on zero-shot classification in this Space 👇")
  with gr.Row():
    with gr.Column():
        image_input = gr.Image(type="pil")
        text_input = gr.Textbox(label="Input a list of labels")
        run_button = gr.Button("Run", visible=True)

    with gr.Column():
      clip_output = gr.Label(label = "CLIP Output", num_top_classes=15)
      
  examples = [["./baklava.jpg", "baklava, souffle, tiramisu"]]
  gr.Examples(
        examples = examples, 
        inputs=[image_input, text_input],
        outputs=[clip_output, 
                 ],
        fn=infer,
        cache_examples=True
    )
  run_button.click(fn=infer,
                    inputs=[image_input, text_input],
                    outputs=[clip_output, 
                             ])

demo.launch()