Spaces:
Runtime error
Runtime error
import gradio as gr | |
import numpy as np | |
from PIL import Image, ImageOps | |
from transformers import DetrImageProcessor, DetrForObjectDetection | |
import torch | |
feature_extractor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-101") | |
dmodel = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101") | |
i1 = gr.inputs.Image(type="pil", label="Input image") | |
i2 = gr.inputs.Textbox(label="Input text") | |
i3 = gr.inputs.Number(default=0.96, label="Threshold percentage score") | |
i4 = gr.inputs.Number(default=400, label="Custom Width (optional)") | |
i5 = gr.inputs.Number(default=400, label="Custom Height (optional)") | |
o1 = gr.outputs.Image(type="pil", label="Cropped part") | |
o2 = gr.outputs.Textbox(label="Similarity score") | |
def extract_image(image, text, prob, custom_width, custom_height): | |
inputs = feature_extractor(images=image, return_tensors="pt") | |
outputs = dmodel(**inputs) | |
pout = "" | |
target_sizes = torch.tensor([image.size[::-1]]) | |
results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0] | |
# Count the number of objects in each area | |
object_counts = {} | |
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): | |
area_key = (round(box[0] / 100) * 100, round(box[1] / 100) * 100) # Group by areas | |
object_counts[area_key] = object_counts.get(area_key, 0) + 1 | |
# Find the area with the most detected objects | |
most_objects_area = max(object_counts, key=object_counts.get) | |
# Calculate cropping coordinates based on the area with most objects and custom dimensions | |
xmin, ymin = most_objects_area | |
xmax = min(xmin + custom_width, image.width) | |
ymax = min(ymin + custom_height, image.height) | |
# Apply a bleed of at least 10 pixels on all sides | |
xmin = max(0, xmin - 10) | |
ymin = max(0, ymin - 10) | |
xmax = min(image.width, xmax + 10) | |
ymax = min(image.height, ymax + 10) | |
cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax))) | |
# Return the coordinates of the cropped area | |
coordinates = f"xmin: {int(xmin)}, ymin: {int(ymin)}, xmax: {int(xmax)}, ymax: {int(ymax)}" | |
return cropped_image, coordinates | |
title = "ClipnCrop" | |
description = "<p style='color:white'>Crop an image with the area containing the most detected objects while maintaining custom dimensions and adding a 10-pixel bleed.</p>" | |
examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]] | |
gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch() | |