Spaces:

kinsung
/

detr101crop

Runtime error

File size: 2,657 Bytes

e05714c
 
 
 
 
7af66ea
e05714c
 
7af66ea
e05714c
 
 
a1656a7
7af66ea
e05714c
 
7af66ea
e05714c
7af66ea
 
628652c
7af66ea
 
 
 
9af0a5c
 
7af66ea
9af0a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175713a
e05714c
9af0a5c
7af66ea

import gradio as gr
import numpy as np
from PIL import Image, ImageOps
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch

feature_extractor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-101")
dmodel = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101")

i1 = gr.inputs.Image(type="pil", label="Input image")
i2 = gr.inputs.Textbox(label="Input text")
i3 = gr.inputs.Number(default=0.96, label="Threshold percentage score")
i4 = gr.inputs.Number(default=400, label="Custom Width (optional)")
i5 = gr.inputs.Number(default=400, label="Custom Height (optional)")
o1 = gr.outputs.Image(type="pil", label="Cropped part")
o2 = gr.outputs.Textbox(label="Similarity score")

def extract_image(image, text, prob, custom_width, custom_height):
    inputs = feature_extractor(images=image, return_tensors="pt")
    outputs = dmodel(**inputs)
    pout = ""

    target_sizes = torch.tensor([image.size[::-1]])
    results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

    # Count the number of objects in each area
    object_counts = {}
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        area_key = (round(box[0] / 100) * 100, round(box[1] / 100) * 100)  # Group by areas
        object_counts[area_key] = object_counts.get(area_key, 0) + 1

    # Find the area with the most detected objects
    most_objects_area = max(object_counts, key=object_counts.get)

    # Calculate cropping coordinates based on the area with most objects and custom dimensions
    xmin, ymin = most_objects_area
    xmax = min(xmin + custom_width, image.width)
    ymax = min(ymin + custom_height, image.height)

    # Apply a bleed of at least 10 pixels on all sides
    xmin = max(0, xmin - 10)
    ymin = max(0, ymin - 10)
    xmax = min(image.width, xmax + 10)
    ymax = min(image.height, ymax + 10)

    cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))

    # Return the coordinates of the cropped area
    coordinates = f"xmin: {int(xmin)}, ymin: {int(ymin)}, xmax: {int(xmax)}, ymax: {int(ymax)}"
    
    return cropped_image, coordinates

title = "ClipnCrop"
description = "<p style='color:white'>Crop an image with the area containing the most detected objects while maintaining custom dimensions and adding a 10-pixel bleed.</p>"
examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]]
gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()