Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModel, AutoProcessor | |
import torch | |
import requests | |
from PIL import Image | |
from io import BytesIO | |
fashion_items = ['top', 'trousers', 'jumper', "shirt", "shorts"] | |
# Load model and processor | |
model_name = 'Marqo/marqo-fashionSigLIP' | |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True) | |
processor = AutoProcessor.from_pretrained(model_name, trust_remote_code=True) | |
# Preprocess and normalize text data | |
with torch.no_grad(): | |
# Ensure truncation and padding are activated | |
processed_texts = processor( | |
text=fashion_items, | |
return_tensors="pt", | |
truncation=True, # Ensure text is truncated to fit model input size | |
padding=True # Pad shorter sequences so that all are the same length | |
)['input_ids'] | |
text_features = model.get_text_features(processed_texts) | |
text_features = text_features / text_features.norm(dim=-1, keepdim=True) | |
# Prediction function | |
def predict_from_url(url): | |
# Check if the URL is empty | |
if not url: | |
return {"Error": "Please input a URL"} | |
try: | |
image = Image.open(BytesIO(requests.get(url).content)) | |
except Exception as e: | |
return {"Error": f"Failed to load image: {str(e)}"} | |
processed_image = processor(images=image, return_tensors="pt")['pixel_values'] | |
with torch.no_grad(): | |
image_features = model.get_image_features(processed_image) | |
image_features = image_features / image_features.norm(dim=-1, keepdim=True) | |
text_probs = (100 * image_features @ text_features.T).softmax(dim=-1) | |
return {fashion_items[i]: float(text_probs[0, i]) for i in range(len(fashion_items))}, url | |
# Gradio interface | |
demo = gr.Interface( | |
fn=predict_from_url, | |
inputs=gr.Textbox(label="Enter Image URL"), | |
outputs=[gr.Label(label="Classification Results"), "image"], | |
title="Fashion Item Classifier", | |
flagging_mode="never" | |
) | |
# Launch the interface | |
demo.launch() | |