from transformers import DetrImageProcessor, DetrForObjectDetection
from transformers import YolosImageProcessor, YolosForObjectDetection
import torch
from PIL import Image
import io


# Load transformer-based model (Yolos or DETR)
def load_model(model_uri: str):
    """Load Transformer model"""
    """  - Doc DETR: https://huggingface.co/docs/transformers/en/model_doc/detr"""
    """  - Doc Yolos: https://huggingface.co/docs/transformers/en/model_doc/yolos"""

    if "detr" in model_uri:
        # you can specify the revision tag if you don't want the timm dependency
        processor = DetrImageProcessor.from_pretrained(model_uri, revision="no_timm")
        model = DetrForObjectDetection.from_pretrained(model_uri, revision="no_timm")
    elif "yolos" in model_uri:
        processor = YolosImageProcessor.from_pretrained(model_uri)
        model = YolosForObjectDetection.from_pretrained(model_uri)
    else:
        processor = None
        model = None
    return processor, model


def object_detection(processor, model, image_bytes):
    """Perform object detection task"""

    print('Object detection...')
    #url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    #image = Image.open(requests.get(url, stream=True).raw)

    img = Image.open(io.BytesIO(image_bytes))
    inputs = processor(images=img, return_tensors="pt")
    # print('inputs', inputs)
    outputs = model(**inputs)

    # convert outputs (bounding boxes and class logits) to COCO API
    # let's only keep detections with score > 0.9
    target_sizes = torch.tensor([img.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    return results