StevenChen16 commited on
Commit
eaf77a6
·
verified ·
1 Parent(s): ec4a3d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -94
app.py CHANGED
@@ -1,74 +1,68 @@
1
  import gradio as gr
2
- import cv2
3
- import tempfile
4
- from ultralytics import YOLOv10
5
  import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
-
8
- @spaces.GPU
9
- def yolov10_inference(image, video, model_id, image_size, conf_threshold):
10
- # model = YOLOv10.from_pretrained(f'jameslahm/{model_id}')
11
- model = YOLOv10.from_pretrained('weights/yolov10s.pt')
12
- if image:
13
- results = model.predict(source=image, imgsz=image_size, conf=conf_threshold)
14
- annotated_image = results[0].plot()
15
- return annotated_image[:, :, ::-1], None
16
- else:
17
- video_path = tempfile.mktemp(suffix=".webm")
18
- with open(video_path, "wb") as f:
19
- with open(video, "rb") as g:
20
- f.write(g.read())
21
-
22
- cap = cv2.VideoCapture(video_path)
23
- fps = cap.get(cv2.CAP_PROP_FPS)
24
- frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
25
- frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
26
-
27
- output_video_path = tempfile.mktemp(suffix=".webm")
28
- out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'vp80'), fps, (frame_width, frame_height))
29
-
30
- while cap.isOpened():
31
- ret, frame = cap.read()
32
- if not ret:
33
- break
34
-
35
- results = model.predict(source=frame, imgsz=image_size, conf=conf_threshold)
36
- annotated_frame = results[0].plot()
37
- out.write(annotated_frame)
38
-
39
- cap.release()
40
- out.release()
41
-
42
- return None, output_video_path
43
-
44
-
45
- def yolov10_inference_for_examples(image, model_path, image_size, conf_threshold):
46
- annotated_image, _ = yolov10_inference(image, None, model_path, image_size, conf_threshold)
47
  return annotated_image
48
 
49
-
50
  def app():
51
  with gr.Blocks():
52
  with gr.Row():
53
  with gr.Column():
54
- image = gr.Image(type="pil", label="Image", visible=True)
55
- video = gr.Video(label="Video", visible=False)
56
- input_type = gr.Radio(
57
- choices=["Image", "Video"],
58
- value="Image",
59
- label="Input Type",
60
- )
61
  model_id = gr.Dropdown(
62
  label="Model",
63
  choices=[
64
- "yolov10n",
65
- "yolov10s",
66
- "yolov10m",
67
- "yolov10b",
68
- "yolov10l",
69
- "yolov10x",
70
  ],
71
- value="yolov10m",
72
  )
73
  image_size = gr.Slider(
74
  label="Image Size",
@@ -79,68 +73,70 @@ def app():
79
  )
80
  conf_threshold = gr.Slider(
81
  label="Confidence Threshold",
82
- minimum=0.0,
83
  maximum=1.0,
84
- step=0.05,
85
  value=0.25,
86
  )
 
 
 
 
 
 
 
87
  yolov10_infer = gr.Button(value="Detect Objects")
88
 
89
  with gr.Column():
90
- output_image = gr.Image(type="numpy", label="Annotated Image", visible=True)
91
  output_video = gr.Video(label="Annotated Video", visible=False)
92
 
93
- def update_visibility(input_type):
94
- image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
95
- video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
96
- output_image = gr.update(visible=True) if input_type == "Image" else gr.update(visible=False)
97
- output_video = gr.update(visible=False) if input_type == "Image" else gr.update(visible=True)
98
-
99
- return image, video, output_image, output_video
100
-
101
- input_type.change(
102
- fn=update_visibility,
103
- inputs=[input_type],
104
- outputs=[image, video, output_image, output_video],
105
- )
106
-
107
- def run_inference(image, video, model_id, image_size, conf_threshold, input_type):
108
- if input_type == "Image":
109
- return yolov10_inference(image, None, model_id, image_size, conf_threshold)
110
- else:
111
- return yolov10_inference(None, video, model_id, image_size, conf_threshold)
112
-
113
-
114
  yolov10_infer.click(
115
- fn=run_inference,
116
- inputs=[image, video, model_id, image_size, conf_threshold, input_type],
117
- outputs=[output_image, output_video],
 
 
 
 
 
 
118
  )
119
 
120
  gr.Examples(
121
  examples=[
122
  [
123
- "ultralytics/assets/bus.jpg",
124
- "yolov10s",
125
  640,
126
  0.25,
 
127
  ],
128
  [
129
- "ultralytics/assets/zidane.jpg",
130
- "yolov10s",
131
  640,
132
  0.25,
 
 
 
 
 
 
 
 
133
  ],
134
  ],
135
- fn=yolov10_inference_for_examples,
136
  inputs=[
137
  image,
138
  model_id,
139
  image_size,
140
  conf_threshold,
 
141
  ],
142
  outputs=[output_image],
143
- cache_examples='lazy',
144
  )
145
 
146
  gradio_app = gr.Blocks()
@@ -154,11 +150,12 @@ with gradio_app:
154
  gr.HTML(
155
  """
156
  <h3 style='text-align: center'>
157
- <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
 
158
  </h3>
159
  """)
160
  with gr.Row():
161
  with gr.Column():
162
  app()
163
- if __name__ == '__main__':
164
- gradio_app.launch()
 
1
  import gradio as gr
2
+ from ultralytics import YOLOv10
3
+ import supervision as sv
 
4
  import spaces
5
+ from huggingface_hub import hf_hub_download
6
+
7
+
8
+ def download_models(model_id):
9
+ hf_hub_download("kadirnar/Yolov10", filename=f"{model_id}", local_dir=f"./")
10
+ return f"./{model_id}"
11
+
12
+ box_annotator = sv.BoxAnnotator()
13
+ category_dict = {
14
+ 0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus',
15
+ 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant',
16
+ 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat',
17
+ 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear',
18
+ 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag',
19
+ 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard',
20
+ 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove',
21
+ 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
22
+ 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl',
23
+ 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli',
24
+ 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake',
25
+ 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table',
26
+ 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard',
27
+ 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink',
28
+ 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors',
29
+ 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
30
+ }
31
+
32
+
33
+ @spaces.GPU(duration=200)
34
+ def yolov10_inference(image, model_id, image_size, conf_threshold, iou_threshold):
35
+ model_path = download_models(model_id)
36
+ model = YOLOv10(model_path)
37
+ results = model(source=image, imgsz=image_size, iou=iou_threshold, conf=conf_threshold, verbose=False)[0]
38
+ detections = sv.Detections.from_ultralytics(results)
39
+
40
+ labels = [
41
+ f"{category_dict[class_id]} {confidence:.2f}"
42
+ for class_id, confidence in zip(detections.class_id, detections.confidence)
43
+ ]
44
+ annotated_image = box_annotator.annotate(image, detections=detections, labels=labels)
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  return annotated_image
47
 
 
48
  def app():
49
  with gr.Blocks():
50
  with gr.Row():
51
  with gr.Column():
52
+ # image = gr.Image(type="pil", label="Image")
53
+ video = gr.Video(type="Video", visible=False)
54
+
 
 
 
 
55
  model_id = gr.Dropdown(
56
  label="Model",
57
  choices=[
58
+ "yolov10n.pt",
59
+ "yolov10s.pt",
60
+ "yolov10m.pt",
61
+ "yolov10b.pt",
62
+ "yolov10l.pt",
63
+ "yolov10x.pt",
64
  ],
65
+ value="yolov10m.pt",
66
  )
67
  image_size = gr.Slider(
68
  label="Image Size",
 
73
  )
74
  conf_threshold = gr.Slider(
75
  label="Confidence Threshold",
76
+ minimum=0.1,
77
  maximum=1.0,
78
+ step=0.1,
79
  value=0.25,
80
  )
81
+ iou_threshold = gr.Slider(
82
+ label="IoU Threshold",
83
+ minimum=0.1,
84
+ maximum=1.0,
85
+ step=0.1,
86
+ value=0.45,
87
+ )
88
  yolov10_infer = gr.Button(value="Detect Objects")
89
 
90
  with gr.Column():
91
+ output_image = gr.Image(type="pil", label="Annotated Image")
92
  output_video = gr.Video(label="Annotated Video", visible=False)
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  yolov10_infer.click(
95
+ fn=yolov10_inference,
96
+ inputs=[
97
+ image,
98
+ model_id,
99
+ image_size,
100
+ conf_threshold,
101
+ iou_threshold,
102
+ ],
103
+ outputs=[output_image],
104
  )
105
 
106
  gr.Examples(
107
  examples=[
108
  [
109
+ "dog.jpeg",
110
+ "yolov10x.pt",
111
  640,
112
  0.25,
113
+ 0.45,
114
  ],
115
  [
116
+ "huggingface.jpg",
117
+ "yolov10m.pt",
118
  640,
119
  0.25,
120
+ 0.45,
121
+ ],
122
+ [
123
+ "zidane.jpg",
124
+ "yolov10b.pt",
125
+ 640,
126
+ 0.25,
127
+ 0.45,
128
  ],
129
  ],
130
+ fn=yolov10_inference,
131
  inputs=[
132
  image,
133
  model_id,
134
  image_size,
135
  conf_threshold,
136
+ iou_threshold,
137
  ],
138
  outputs=[output_image],
139
+ cache_examples=True,
140
  )
141
 
142
  gradio_app = gr.Blocks()
 
150
  gr.HTML(
151
  """
152
  <h3 style='text-align: center'>
153
+ Follow me for more!
154
+ <a href='https://twitter.com/kadirnar_ai' target='_blank'>Twitter</a> | <a href='https://github.com/kadirnar' target='_blank'>Github</a> | <a href='https://www.linkedin.com/in/kadir-nar/' target='_blank'>Linkedin</a> | <a href='https://www.huggingface.co/kadirnar/' target='_blank'>HuggingFace</a>
155
  </h3>
156
  """)
157
  with gr.Row():
158
  with gr.Column():
159
  app()
160
+
161
+ gradio_app.launch(debug=True)