Spaces:

onuralpszr
/

YOLO-World-Seg

Sleeping

App Files Files Community

onuralpszr commited on Feb 18

Commit

ab8156f

•

1 Parent(s): 4fb6d61

feat: ✨ YOLO-World-Seg Image process added

Browse files

Signed-off-by: Onuralp SEZER <thunderbirdtr@gmail.com>

Files changed (3) hide show

README.md +3 -3
app.py +184 -10
requirements.txt +12 -12

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
 title: YOLO World Seg
-emoji: ⚡
 colorFrom: purple
-colorTo: indigo
 sdk: gradio
 sdk_version: 4.19.1
 app_file: app.py
 pinned: false
-license: mit
 ---
 - openai/clip-vit-base-patch32
 - wondervictor/YOLO-World

 ---
 title: YOLO World Seg
+emoji: 🎨
 colorFrom: purple
+colorTo: red
 sdk: gradio
 sdk_version: 4.19.1
 app_file: app.py
 pinned: false
+license: gpl-3.0
 ---
 - openai/clip-vit-base-patch32
 - wondervictor/YOLO-World

app.py CHANGED Viewed

@@ -1,16 +1,190 @@
-# import os
-# os.system("mim install 'mmengine>=0.7.0'")
-# os.system("mim install mmcv")
-# os.system("mim install 'mmdet>=3.0.0'")
-# os.system("pip install -e .")
-# from yolo_world import version
 import gradio as gr
-def greet(name):
-    return "text"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import os
+os.system("mim install 'mmengine>=0.7.0'")
+os.system("mim install mmcv")
+os.system("mim install 'mmdet>=3.0.0'")
+os.system("pip install -e .")
+import numpy as np
+import torch
+from mmengine.config import Config
+from mmengine.dataset import Compose
+from mmengine.runner import Runner
+from mmengine.runner.amp import autocast
+from mmyolo.registry import RUNNERS
+from torchvision.ops import nms
+import supervision as sv
+import PIL.Image
+import cv2
 import gradio as gr
+TITLE = """
+# YOLO-World-Seg
+This is a demo of zero-shot object detection and instance segmentation using
+[YOLO-World](https://github.com/AILab-CVC/YOLO-World)
+Powered by [Supervision](https://github.com/roboflow/supervision).
+"""
+EXAMPLES = [
+    ["https://media.roboflow.com/efficient-sam/corgi.jpg", "dog",0.5,0.5,0.5,100],
+    ["https://media.roboflow.com/efficient-sam/horses.jpg", "horse",0.5,0.5,0.5,100],
+    ["https://media.roboflow.com/efficient-sam/bears.jpg", "bear",0.5,0.5,0.5,100],
+]
+box_annotator = sv.BoxAnnotator()
+label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
+mask_annotator = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
+def load_runner():
+    cfg = Config.fromfile(
+    "./configs/segmentation/yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_seghead_finetune_lvis.py"
+    )
+    cfg.work_dir = "."
+    cfg.load_from = "yolo_world_seg_l_dual_vlpan_2e-4_80e_8gpus_seghead_finetune_lvis-5a642d30.pth"
+    runner = Runner.from_cfg(cfg)
+    runner.call_hook("before_run")
+    runner.load_or_resume()
+    pipeline = cfg.test_dataloader.dataset.pipeline
+    runner.pipeline = Compose(pipeline)
+    runner.model.eval()
+def run_image(
+        input_image,
+        class_names="person,car,bus,truck",
+        score_thr=0.05,
+        iou_thr=0.5,
+        nms_thr = 0.5,
+        max_num_boxes=100,
+        ):
+    runner = load_runner()
+    with open("input.jpeg", "wb") as f:
+        f.write(input_image)
+    class_names = [class_name.strip() for class_name in class_names.split(',')]
+    texts = [[t.strip()] for t in class_names.split(",")] + [[" "]]
+    data_info = runner.pipeline(dict(img_id=0, img_path="input.jpeg",
+                                     texts=texts))
+    data_batch = dict(
+        inputs=data_info["inputs"].unsqueeze(0),
+        data_samples=[data_info["data_samples"]],
+    )
+    with autocast(enabled=False), torch.no_grad():
+        output = runner.model.test_step(data_batch)[0]
+        runner.model.class_names = texts
+        pred_instances = output.pred_instances
+    keep_idxs = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=iou_thr)
+    pred_instances = pred_instances[keep_idxs]
+    pred_instances = pred_instances[pred_instances.scores.float() > score_thr]
+    if len(pred_instances.scores) > max_num_boxes:
+        indices = pred_instances.scores.float().topk(max_num_boxes)[1]
+        pred_instances = pred_instances[indices]
+    output.pred_instances = pred_instances
+    result = pred_instances.cpu().numpy()
+    detections = sv.Detections(
+    xyxy=result['bboxes'],
+    class_id=result['labels'],
+    confidence=result['scores'],
+    mask = result['masks']
+    )
+    detections = detections.with_nms(threshold=nms_thr)
+    labels = [
+        f"{class_id} {confidence:.3f}"
+        for class_id, confidence
+        in zip(detections.class_id, detections.confidence)
+    ]
+    svimage = box_annotator.annotate(input_image, detections)
+    svimage = label_annotator.annotate(svimage, detections, labels)
+    svimage = mask_annotator.annotate(svimage,detections)
+    return svimage
+confidence_threshold_component = gr.Slider(
+    minimum=0,
+    maximum=1.0,
+    value=0.3,
+    step=0.01,
+    label="Confidence Threshold",
+    info=(
+        "The confidence threshold for the YOLO-World model. Lower the threshold to "
+        "reduce false negatives, enhancing the model's sensitivity to detect "
+        "sought-after objects. Conversely, increase the threshold to minimize false "
+        "positives, preventing the model from identifying objects it shouldn't."
+    ))
+iou_threshold_component = gr.Slider(
+    minimum=0,
+    maximum=1.0,
+    value=0.5,
+    step=0.01,
+    label="IoU Threshold",
+    info=(
+        "The Intersection over Union (IoU) threshold for non-maximum suppression. "
+        "Decrease the value to lessen the occurrence of overlapping bounding boxes, "
+        "making the detection process stricter. On the other hand, increase the value "
+        "to allow more overlapping bounding boxes, accommodating a broader range of "
+        "detections."
+    ))
+with gr.Blocks() as demo:
+    gr.Markdown(TITLE)
+    with gr.Accordion("Configuration", open=False):
+        confidence_threshold_component.render()
+        iou_threshold_component.render()
+    with gr.Tab(label="Image"):
+        with gr.Row():
+            input_image_component = gr.Image(
+                type='numpy',
+                label='Input Image'
+            )
+            output_image_component = gr.Image(
+                type='numpy',
+                label='Output Image'
+            )
+        with gr.Row():
+            image_categories_text_component = gr.Textbox(
+                label='Categories',
+                placeholder='comma separated list of categories',
+                scale=7
+            )
+            image_submit_button_component = gr.Button(
+                value='Submit',
+                scale=1,
+                variant='primary'
+            )
+        gr.Examples(
+            fn=run_image,
+            examples=EXAMPLES,
+            inputs=[
+                input_image_component,
+                image_categories_text_component,
+                confidence_threshold_component,
+                iou_threshold_component,
+            ],
+            outputs=output_image_component
+        )
+    image_submit_button_component.click(
+        fn=run_image,
+        inputs=[
+            input_image_component,
+            image_categories_text_component,
+            confidence_threshold_component,
+            iou_threshold_component,
+        ],
+        outputs=output_image_component
+    )
+demo.launch(debug=False, show_error=True)

requirements.txt CHANGED Viewed

@@ -1,14 +1,14 @@
-# openmim
 gradio
-# transformers
-# numpy
-# opencv-python
-# supervision
-# wheel
-# --extra-index-url https://download.pytorch.org/whl/cu121
-# torch==2.1.0+cu121
-# torchdata==0.7.0
-# torchsummary==1.5.1
-# torchtext==0.16.0
-# torchvision==0.16.0+cu121

+openmim
 gradio
+transformers
+numpy
+opencv-python
+supervision
+wheel
+--extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.1.0+cu121
+torchdata==0.7.0
+torchsummary==1.5.1
+torchtext==0.16.0
+torchvision==0.16.0+cu121