rt-detr-object-detection-webrtc

Running on Zero

App Files Files Community

freddyaboulton HF staff commited on Sep 11, 2024

Commit

ccc35d4

1 Parent(s): 385e56e

push

Browse files

Files changed (1) hide show

app.py +20 -5

app.py CHANGED Viewed

@@ -3,10 +3,23 @@ import gradio as gr
 import cv2
 import tempfile
 from ultralytics import YOLOv10
 image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
 model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
 @spaces.GPU
 def yolov10_inference(image, conf_threshold):
@@ -17,6 +30,7 @@ def yolov10_inference(image, conf_threshold):
     results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3)
 def app():
@@ -39,23 +53,24 @@ def app():
             time_limit=30
         )
-gradio_app = gr.Blocks()
-with gradio_app:
     gr.HTML(
         """
     <h1 style='text-align: center'>
-    YOLOv10 Webcam Stream
     </h1>
     """)
     gr.HTML(
         """
         <h3 style='text-align: center'>
-        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
         </h3>
         """)
     with gr.Row():
         with gr.Column():
             app()
 if __name__ == '__main__':
-    gradio_app.launch()

 import cv2
 import tempfile
 from ultralytics import YOLOv10
+from PIL import Image, ImageDraw, ImageFont
 image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
 model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
+def draw_bounding_boxes(image, results, model, threshold=0.3):
+    draw = ImageDraw.Draw(image)
+    for result in results:
+        for score, label_id, box in zip(result["scores"], result["labels"], result["boxes"]):
+            if score > threshold:
+                label = model.config.id2label[label_id.item()]
+                box = [round(i) for i in box.tolist()]
+                draw.rectangle(box, outline="red", width=3)
+                draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
+    return image
 @spaces.GPU
 def yolov10_inference(image, conf_threshold):
     results = image_processor.post_process_object_detection(outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3)
+    return draw_bounding_boxes(image, results, model, threshold=conf_threshold)
 def app():
             time_limit=30
         )
+css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
+                      .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
+with gr.Blocks(css=css) as app:
     gr.HTML(
         """
     <h1 style='text-align: center'>
+    Near Real-Time Webcam Stream with RTDetr
     </h1>
     """)
     gr.HTML(
         """
         <h3 style='text-align: center'>
+        <a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
         </h3>
         """)
     with gr.Row():
         with gr.Column():
             app()
 if __name__ == '__main__':
+    app.launch()