from transformers import ViTImageProcessor, ViTForImageClassification import torch import gradio as gr feature_extractor = ViTImageProcessor.from_pretrained("car_scene_model") model = ViTForImageClassification.from_pretrained("car_scene_model") labels = ['Exterior', 'Interior', 'Unknown'] def classify(im): features = feature_extractor(im, return_tensors='pt') logits = model(features["pixel_values"])[-1] probability = torch.nn.functional.softmax(logits, dim=-1) probs = probability[0].detach().numpy() confidences = {label: float(probs[i]) for i, label in enumerate(labels)} return confidences description = """ Car scene recognition demo. Upload or drag a .jpg image to test """ interface = gr.Interface(fn=classify, inputs="image", outputs="label", title="Car scene recognition", examples=["crv.jpg", "cadillac1.jpeg", "cadillacinterior.jpeg", "outsidescene.jpg", "wheel.jpeg", "crv_inside.jpg", "chevy_exterior.jpeg", "lexus_inside.jpeg", "malibu_interior.jpeg", "maserati_interior.jpeg", "highlander_inside.jpeg", "altima_inside.jpeg", "altima_outside.jpeg"], description=description ) interface.launch()