Spaces:
Sleeping
Sleeping
import gradio as gr | |
import spaces | |
import torch | |
from transformers import AutoProcessor, AutoModelForZeroShotImageClassification | |
from datasets import load_dataset | |
dataset = load_dataset("not-lain/embedded-pokemon", split="train") | |
dataset = dataset.add_faiss_index("embeddings") | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14") | |
model = AutoModelForZeroShotImageClassification.from_pretrained( | |
"openai/clip-vit-large-patch14", device_map=device | |
) | |
def search(query: str, k: int = 4): | |
"""a function that embeds a new image and returns the most probable results""" | |
pixel_values = processor(images=query, return_tensors="pt")[ | |
"pixel_values" | |
] # embed new image | |
pixel_values = pixel_values.to(device) | |
img_emb = model.get_image_features(pixel_values)[0] # because 1 element | |
img_emb = img_emb.cpu().detach().numpy() # because datasets only works with numpy | |
scores, retrieved_examples = dataset.get_nearest_examples( # retrieve results | |
"embeddings", | |
img_emb, # compare our new embedded query with the dataset embeddings | |
k=k, # get only top k results | |
) | |
images = retrieved_examples["image"] | |
labels = {} | |
for i in range(k): | |
labels[retrieved_examples["text"][i]] = scores[i] | |
return images, labels | |
demo = gr.Interface(search, inputs="image", outputs=["gallery", "label"]) | |
demo.launch(debug=True) | |