Spaces:

kinsung
/

detr101crop

Runtime error

App Files Files Community

kinsung commited on Sep 26, 2023

Commit

e05714c

1 Parent(s): 3483d27

app

Browse files

Files changed (1) hide show

app.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+import numpy as np
+from PIL import Image, ImageOps
+from transformers import DetrImageProcessor, DetrForObjectDetection
+import torch
+feature_extractor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-101")
+dmodel = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101")
+i1 = gr.inputs.Image(type="pil", label="Input image")
+i2 = gr.inputs.Textbox(label="Input text")
+i3 = gr.inputs.Number(default=0.96, label="Threshold percentage score")
+i4 = gr.inputs.Number(default=200, label="Custom Width (optional)")
+i5 = gr.inputs.Number(default=200, label="Custom Height (optional)")
+o1 = gr.outputs.Image(type="pil", label="Cropped part")
+o2 = gr.outputs.Textbox(label="Similarity score")
+def extract_image(image, text, prob, custom_width, custom_height):
+    inputs = feature_extractor(images=image, return_tensors="pt")
+    outputs = dmodel(**inputs)
+    target_sizes = torch.tensor([image.size[::-1]])
+    results = feature_extractor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
+    # Retrieve coordinates of the detected key object based on the input text
+    key_object_coordinates = None
+    object_to_detect = text.lower()
+    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
+        label_name = dmodel.config.id2label[label.item()].lower()
+        if object_to_detect in label_name:
+            key_object_coordinates = box.tolist()
+            break
+    # Define cropped_image before if condition
+    cropped_image = image
+    # Ensure that the key object is in the cropped image
+    if key_object_coordinates:
+        xmin, ymin, xmax, ymax = key_object_coordinates
+        width, height = cropped_image.size
+        if xmax > width:
+            xmin -= xmax - width
+            xmax = width
+        if ymax > height:
+            ymin -= ymax - height
+            ymax = height
+        cropped_image = image.crop((int(xmin), int(ymin), int(xmax), int(ymax)))
+    return cropped_image,int(xmax)
+title = "ClipnCrop"
+description = "<p style='color:white'>obj and Facebook DETR implemented on HuggingFace Transformers. If the similarity score is not high enough, consider the prediction void.</p>"
+examples = [['ex3.jpg', 'black bag', 0.96, 200, 200, False], ['ex2.jpg', 'man in red dress', 0.85, 300, 300, True]]
+gr.Interface(fn=extract_image, inputs=[i1, i2, i3, i4, i5], outputs=[o1, o2], title=title, description=description, examples=examples, enable_queue=True).launch()