import os import logging import torch from transformers import AutoImageProcessor, AutoModelForObjectDetection from label_studio_ml.model import LabelStudioMLBase from lxml import etree from uuid import uuid4 from PIL import Image class Model(LabelStudioMLBase): image_processor = AutoImageProcessor.from_pretrained("diegokauer/conditional-detr-coe-int") model = AutoModelForObjectDetection.from_pretrained("diegokauer/conditional-detr-coe-int") def __init__(self, **kwargs): # don't forget to call base class constructor super(Model, self).__init__(**kwargs) # you can preinitialize variables with keys needed to extract info from tasks and annotations and form predictions self.model = model self.tokenizer = image_processor self.id2label = model.config.id2label def predict(self, tasks, **kwargs): """ This is where inference happens: model returns the list of predictions based on input list of tasks """ predictions = [] for task in tasks: image_path = task["data"]["image"] image = Image(image_path) original_width, original_height = image.size with torch.no_grad(): inputs = self.image_processor(images=image, return_tensors="pt") outputs = self.model(**inputs) target_sizes = torch.tensor([image.size[::-1]]) results = self.image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[0] result_list = [] for score, label, box in zip(results['scores'], results['labels'], results['boxes']): label_id = str(uuid4())[:4] x, y, x2, y2 = tuple(box) result_list.append({ 'id': label_id, 'original_width': original_width, 'original_height': original_height, 'from_name': "label", 'to_name': "image", 'type': 'labels', 'score': score, # per-region score, visible in the editor 'value': { 'x': x, 'y': y, 'width': x2-x, 'height': y2-y, 'rotation': 0, 'labels': [self.id2label[label]] } }) predictions.append({ 'score': results['scores'].mean(), # prediction overall score, visible in the data manager columns 'model_version': 'diegokauer/conditional-detr-coe-int', # all predictions will be differentiated by model version 'result': result_list }) return predictions