File size: 2,657 Bytes
e05714c
 
 
 
 
7af66ea
e05714c
 
7af66ea
e05714c
 
 
a1656a7
7af66ea
e05714c
 
7af66ea
e05714c
7af66ea
 
628652c
7af66ea
 
 
 
9af0a5c
 
7af66ea
9af0a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175713a
e05714c
9af0a5c
7af66ea
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import numpy as np
from PIL import Image, ImageOps
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch

feature_extractor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-101")
dmodel = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101")

i1 = gr.inputs.Image(type="pil", label="Input image")
i2 = gr.inputs.Textbox(label="Input text")
i3 = gr.inputs.Number(default=0.96, label="Threshold percentage score")
i4 = gr.inputs.Number(default=400, label="Custom Width (optional)")
i5 = gr.inputs.Number(default=400, label="Custom Height (optional)")
o1 = gr.outputs.Image(type="pil", label="Cropped part")
o2 = gr.outputs.Textbox(label="Similarity score")

def extract_image(image, text, prob, custom_width, custom_height):
    inputs = feature_extractor(images=image, return_tensors="pt")
    outputs = dmodel(**inputs)
    pout = ""

    target_sizes = torch.tensor([image.size[::-1]])
    results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

    # Count the number of objects in each area
    object_counts = {}
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        area_key = (round(box[0] / 100) * 100, round(box[1] / 100) * 100)  # Group by areas
        object_counts[area_key] = object_counts.get(area_key, 0) + 1

    # Find the area with the most detected objects
    most_objects_area = max(object_counts, key=object_counts.get)

    # Calculate cropping coordinates based on the area with most objects and custom dimensions
    xmin, ymin = most_objects_area
    xmax = min(xmin + custom_width, image.width)
    ymax = min(ymin + custom_height, image.height)

    # Apply a bleed of at least 10 pixels on all sides
    xmin = max(0, xmin - 10)
    ymin = max(0, ymin - 10)
    xmax = min(image.width, xmax + 10)
    ymax = min(image.height, ymax + 10)

    cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))

    # Return the coordinates of the cropped area
    coordinates = f"xmin: {int(xmin)}, ymin: {int(ymin)}, xmax: {int(xmax)}, ymax: {int(ymax)}"
    
    return cropped_image, coordinates

title = "ClipnCrop"
description = "<p style='color:white'>Crop an image with the area containing the most detected objects while maintaining custom dimensions and adding a 10-pixel bleed.</p>"
examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]]
gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()