kinsung commited on
Commit
9af0a5c
·
1 Parent(s): 628652c
Files changed (1) hide show
  1. app.py +26 -29
app.py CHANGED
@@ -23,37 +23,34 @@ def extract_image(image, text, prob, custom_width, custom_height):
23
  target_sizes = torch.tensor([image.size[::-1]])
24
  results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
25
 
26
- # Retrieve coordinates of the detected key object based on the input text
27
- key_object_coordinates = None
28
-
29
- object_to_detect = text.lower()
30
  for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
31
- label_name = dmodel.config.id2label[label.item()].lower()
32
- pout =pout+" "+ label_name
33
- if object_to_detect in label_name:
34
- key_object_coordinates = box.tolist()
35
-
36
- break
37
-
38
- # Ensure that the key object is in the cropped image
39
- if key_object_coordinates:
40
- xmin, ymin, xmax, ymax = key_object_coordinates
41
- width, height = image.size
42
-
43
- # Calculate cropping coordinates based on key object location and custom dimensions
44
- xmin = max(0, xmin - 50)
45
- ymin = max(0, ymin - 50)
46
- xmax = custom_width
47
- ymax = custom_height
48
-
49
- cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))
50
- else:
51
- # If no key object found, just crop based on custom dimensions
52
- cropped_image = image.crop((0, 0, custom_width, custom_height))
53
-
54
- return cropped_image, pout
55
 
56
  title = "ClipnCrop"
57
- description = "<p style='color:white'>Crop an image with the detected key object within the cropped region. The cropped image maintains resolution and ensures the key object is included.</p>"
58
  examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]]
59
  gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()
 
23
  target_sizes = torch.tensor([image.size[::-1]])
24
  results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
25
 
26
+ # Count the number of objects in each area
27
+ object_counts = {}
 
 
28
  for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
29
+ area_key = (round(box[0] / 100) * 100, round(box[1] / 100) * 100) # Group by areas
30
+ object_counts[area_key] = object_counts.get(area_key, 0) + 1
31
+
32
+ # Find the area with the most detected objects
33
+ most_objects_area = max(object_counts, key=object_counts.get)
34
+
35
+ # Calculate cropping coordinates based on the area with most objects and custom dimensions
36
+ xmin, ymin = most_objects_area
37
+ xmax = min(xmin + custom_width, image.width)
38
+ ymax = min(ymin + custom_height, image.height)
39
+
40
+ # Apply a bleed of at least 10 pixels on all sides
41
+ xmin = max(0, xmin - 10)
42
+ ymin = max(0, ymin - 10)
43
+ xmax = min(image.width, xmax + 10)
44
+ ymax = min(image.height, ymax + 10)
45
+
46
+ cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))
47
+
48
+ # Return the coordinates of the cropped area
49
+ coordinates = f"xmin: {int(xmin)}, ymin: {int(ymin)}, xmax: {int(xmax)}, ymax: {int(ymax)}"
50
+
51
+ return cropped_image, coordinates
 
52
 
53
  title = "ClipnCrop"
54
+ description = "<p style='color:white'>Crop an image with the area containing the most detected objects while maintaining custom dimensions and adding a 10-pixel bleed.</p>"
55
  examples = [['ex3.jpg', 'people', 0.96, 800, 400], ['ex2.jpg', 'smiling face', 0.85, 300, 400]]
56
  gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()