from models.builder import build_model from visualization import mask2rgb from segmentation.datasets import PascalVOCDataset import os from hydra import compose, initialize from PIL import Image import matplotlib.pyplot as plt from torchvision import transforms as T import torch.nn.functional as F import numpy as np from operator import itemgetter import torch import warnings warnings.filterwarnings("ignore") initialize(config_path="configs", version_base=None) from huggingface_hub import Repository repo = Repository( local_dir="clip-dinoiser", clone_from="ariG23498/clip-dinoiser", use_auth_token=os.environ.get("token") ) check_path = 'clip-dinoiser/checkpoints/last.pt' device = "cuda" if torch.cuda.is_available() else "cpu" check = torch.load(check_path, map_location=device) dinoclip_cfg = "clip_dinoiser.yaml" cfg = compose(config_name=dinoclip_cfg) model = build_model(cfg.model, class_names=PascalVOCDataset.CLASSES).to(device) model.clip_backbone.decode_head.use_templates=False # switching off the imagenet templates for fast inference model.load_state_dict(check['model_state_dict'], strict=False) model = model.eval() import gradio as gr def greet(name): return "Hello " + name + "!!" iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()