Spaces:

kinsung
/

detr101crop

Runtime error

App Files Files Community

kinsung commited on Sep 27, 2023

Commit

9af0a5c

1 Parent(s): 628652c

most

Browse files

Files changed (1) hide show

app.py +26 -29

app.py CHANGED Viewed

@@ -23,37 +23,34 @@ def extract_image(image, text, prob, custom_width, custom_height):
     target_sizes = torch.tensor([image.size[::-1]])
     results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
-    # Retrieve coordinates of the detected key object based on the input text
-    key_object_coordinates = None
-    object_to_detect = text.lower()
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
-        label_name = dmodel.config.id2label[label.item()].lower()
-        pout =pout+" "+ label_name
-        if object_to_detect in label_name:
-            key_object_coordinates = box.tolist()
-            break
-    # Ensure that the key object is in the cropped image
-    if key_object_coordinates:
-        xmin, ymin, xmax, ymax = key_object_coordinates
-        width, height = image.size
-        # Calculate cropping coordinates based on key object location and custom dimensions
-        xmin = max(0, xmin - 50)
-        ymin = max(0, ymin - 50)
-        xmax = custom_width
-        ymax = custom_height
-        cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))
-    else:
-        # If no key object found, just crop based on custom dimensions
-        cropped_image = image.crop((0, 0, custom_width, custom_height))
-    return cropped_image, pout
 title = "ClipnCrop"
-description = "<p style='color:white'>Crop an image with the detected key object within the cropped region. The cropped image maintains resolution and ensures the key object is included.</p>"
 examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]]
 gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()

     target_sizes = torch.tensor([image.size[::-1]])
     results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    # Count the number of objects in each area
+    object_counts = {}
     for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        area_key = (round(box[0] / 100) * 100, round(box[1] / 100) * 100)  # Group by areas
+        object_counts[area_key] = object_counts.get(area_key, 0) + 1
+    # Find the area with the most detected objects
+    most_objects_area = max(object_counts, key=object_counts.get)
+    # Calculate cropping coordinates based on the area with most objects and custom dimensions
+    xmin, ymin = most_objects_area
+    xmax = min(xmin + custom_width, image.width)
+    ymax = min(ymin + custom_height, image.height)
+    # Apply a bleed of at least 10 pixels on all sides
+    xmin = max(0, xmin - 10)
+    ymin = max(0, ymin - 10)
+    xmax = min(image.width, xmax + 10)
+    ymax = min(image.height, ymax + 10)
+    cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))
+    # Return the coordinates of the cropped area
+    coordinates = f"xmin: {int(xmin)}, ymin: {int(ymin)}, xmax: {int(xmax)}, ymax: {int(ymax)}"
+    return cropped_image, coordinates
 title = "ClipnCrop"
+description = "<p style='color:white'>Crop an image with the area containing the most detected objects while maintaining custom dimensions and adding a 10-pixel bleed.</p>"
 examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]]
 gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()