import torch from PIL import Image import requests import gradio as gr from transformers import AlignProcessor, AlignModel processor = AlignProcessor.from_pretrained("kakaobrain/align-base") model = AlignModel.from_pretrained("kakaobrain/align-base") def get_image_alignment_probabilities(image, is_url): candidate_labels = ["advertisement", "not an advertisement"] # Load image from URL or locally if is_url: image = Image.open(requests.get(image, stream=True).raw).convert("RGB") else: image = Image.open(image).convert("RGB") # Process inputs inputs = processor(text=candidate_labels, images=image, return_tensors="pt") # Compute outputs with torch.no_grad(): outputs = model(**inputs) # Extract logits per image logits_per_image = outputs.logits_per_image # Compute label probabilities using softmax probs = logits_per_image.softmax(dim=1) return {label: prob.item() for label, prob in zip(candidate_labels, probs[0])} iface = gr.Interface(fn=get_image_alignment_probabilities, inputs=[gr.Image(type='filepath', label="Upload Image"), "checkbox"], outputs="label") iface.launch()