File size: 1,807 Bytes
d60d34b acda6c7 d60d34b acda6c7 fc5e177 acda6c7 73f9f45 acda6c7 3601eff c890be1 3601eff c890be1 3601eff c890be1 3601eff acda6c7 d60d34b c890be1 d60d34b c890be1 d60d34b c890be1 d60d34b c890be1 d60d34b c890be1 d60d34b c890be1 d60d34b c890be1 d60d34b 3601eff d60d34b 3601eff c890be1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import torch
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
import spaces
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to("cuda")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
@spaces.GPU
def calculate_score(image, text):
labels = text.split(";")
labels = [l.strip() for l in labels]
labels = list(filter(None, labels))
if len(labels) == 0:
return dict()
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
inputs = {k: v.to("cuda") for k, v in inputs.items()}
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image.detach().cpu().numpy()
results_dict = {label: score / 100.0 for label, score in zip(labels, logits_per_image[0])}
return results_dict
with gr.Blocks() as demo:
gr.Markdown("# CLIP Score")
gr.Markdown("Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text")
with gr.Row():
image_input = gr.Image()
output_label = gr.Label()
text_input = gr.Textbox(label="Descriptions (separated by semicolons)")
image_input.change(
fn=calculate_score,
inputs=[image_input, text_input],
outputs=output_label
)
text_input.submit(
fn=calculate_score,
inputs=[image_input, text_input],
outputs=output_label
)
gr.Examples(
examples=[
[
"cat.jpg",
"a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void",
]
],
fn=calculate_score,
inputs=[image_input, text_input],
outputs=output_label,
)
demo.launch() |