Spaces:

ankanxopencv
/

Gradio-with-OpenCV-DNN

Runtime error

App Files Files Community

ankanxopencv commited on Apr 5

Commit

fb26b5c

•

1 Parent(s): 0816a52

Upload 13 files

Browse files

Files changed (14) hide show

.gitattributes +3 -0
app.py +127 -0
coco.names +80 -0
frozen_inference_graph.pb +3 -0
mbnet.py +80 -0
object_detection_classes_coco.txt +90 -0
requirements.txt +3 -0
sample/dog.jpg +0 -0
sample/person.mp4 +3 -0
sample/video_1.mp4 +3 -0
ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt +0 -0
yolov3.cfg +789 -0
yolov3.py +134 -0
yolov3.weights +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+sample/person.mp4 filter=lfs diff=lfs merge=lfs -text
+sample/video_1.mp4 filter=lfs diff=lfs merge=lfs -text
+yolov3.weights filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import cv2
+import numpy as np
+import gradio as gr  # type: ignore
+from mbnet import load_model, detect_objects, get_box_dimensions, draw_labels, load_img
+from yolov3 import load_image, load_yolo, detect_objects_yolo, get_box_dimensions_yolo, draw_labels_yolo
+# Image Inference
+def img_inf(img,model):
+    if model=="MobileNet-SSD":
+        model, classes, colors = load_model()
+        image, height, width, channels = load_img(img)
+        blob, outputs = detect_objects(image, model)
+        boxes, class_ids = get_box_dimensions(outputs, height, width)
+        image1 = draw_labels(boxes, colors, class_ids, classes, image)
+        return cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
+    else:
+        model, classes, colors, output_layers = load_yolo()
+        image, height, width, channels = load_image(img)
+        blob, outputs = detect_objects_yolo(image, model, output_layers)
+        boxes, confs, class_ids = get_box_dimensions_yolo(outputs, height, width)
+        image=draw_labels_yolo(boxes, confs, colors, class_ids, classes, image)
+        return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+model_name = gr.Radio(["MobileNet-SSD", "YOLOv3"], value="YOLOv3", label="Model", info="choose your model")
+inputs_image = gr.Image(type="filepath", label="Input Image")
+outputs_image = gr.Image(type="numpy", label="Output Image")
+interface_image = gr.Interface(
+    fn=img_inf,
+    inputs=[inputs_image,model_name],
+    outputs=outputs_image,
+    title="Image Inference",
+    description="Upload your photo and select one model and see the results!",
+    examples=[["sample/dog.jpg"]],
+    cache_examples=False,
+)
+# Video Inference
+def vid_inf(vid, model_type):
+    if model_type == "MobileNet-SSD":
+        cap = cv2.VideoCapture(vid)
+        # get the video frames' width and height for proper saving of videos
+        frame_width = int(cap.get(3))
+        frame_height = int(cap.get(4))
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        frame_size = (frame_width, frame_height)
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        output_video = "output_recorded.mp4"
+        # create the `VideoWriter()` object
+        out = cv2.VideoWriter(output_video, fourcc, fps, frame_size)
+        model, classes, colors = load_model()
+        while cap.isOpened():
+            ret, frame = cap.read()
+            if ret:
+                height, width, channels = frame.shape
+                blob, outputs = detect_objects(frame, model)
+                boxes, class_ids = get_box_dimensions(outputs, height, width)
+                frame = draw_labels(boxes, colors, class_ids, classes, frame)
+                out.write(frame)
+                yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),None
+            else:
+                break
+        cap.release()
+        out.release()
+        cv2.destroyAllWindows()
+        yield None, output_video
+    else:
+        cap = cv2.VideoCapture(vid)
+        # get the video frames' width and height for proper saving of videos
+        frame_width = int(cap.get(3))
+        frame_height = int(cap.get(4))
+        fps = int(cap.get(cv2.CAP_PROP_FPS))
+        frame_size = (frame_width, frame_height)
+        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+        output_video = "output_recorded.mp4"
+        # create the `VideoWriter()` object
+        out = cv2.VideoWriter(output_video, fourcc, fps, frame_size)
+        model, classes, colors, output_layers = load_yolo()
+        while cap.isOpened():
+            ret, frame_y = cap.read()
+            if ret:
+                height, width, channels = frame_y.shape
+                blob, outputs = detect_objects_yolo(frame_y, model, output_layers)
+                boxes, confs, class_ids = get_box_dimensions_yolo(outputs, height, width)
+                frame_y = draw_labels_yolo(boxes, confs, colors, class_ids, classes, frame_y)
+                out.write(frame_y)
+                yield cv2.cvtColor(frame_y, cv2.COLOR_BGR2RGB), None
+            else:
+                break
+        cap.release()
+        out.release()
+        cv2.destroyAllWindows()
+        yield None, output_video
+model_name = gr.Radio(["MobileNet-SSD", "YOLOv3"], value="YOLOv3", label="Model", info="choose your model")
+input_video = gr.Video(sources=None, label="Input Video")
+output_frame = gr.Image(type="numpy", label="Output Frames")
+output_video_file = gr.Video(label="Output video")
+interface_video = gr.Interface(
+    fn=vid_inf,
+    inputs=[input_video, model_name],
+    outputs=[output_frame,output_video_file],
+    title="Video Inference",
+    description="Upload your video and select one model and see the results!",
+    examples=[["sample/video_1.mp4"],["sample/person.mp4"]],
+    cache_examples=False,
+)
+gr.TabbedInterface(
+    [interface_image, interface_video],
+    tab_names=['Image', 'Video'],
+    title='GradioxOpenCV-DNN'
+).queue().launch()

coco.names ADDED Viewed

	@@ -0,0 +1,80 @@

+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

frozen_inference_graph.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a8d8a89d695842e60d8c6d144181100555563e21acf2fa1e8f561fec5c3c6ad
+size 69688296

mbnet.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import cv2
+import numpy as np
+#import time
+#video_path = 'D:/OfficeWork/VS_code_exp/exp/video_1.mp4'
+#image_path = 'D:/OfficeWork/VS_code_exp/exp/test.jpg.jpg'
+def load_model():
+    model= cv2.dnn.readNet(model='frozen_inference_graph.pb',
+                           config='ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt',
+                           framework='TensorFlow')
+    with open('object_detection_classes_coco.txt', 'r') as f:
+        class_names = f.read().split('\n')
+    COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))
+    return model, class_names, COLORS
+def load_img(img_path):
+    img=cv2.imread(img_path)
+    img=cv2.resize(img, None, fx=0.4, fy=0.4)
+    height, width, channels = img.shape
+    return img, height, width, channels
+def detect_objects(img, net):
+	blob = cv2.dnn.blobFromImage(img, size=(300, 300), mean=(104, 117, 123), swapRB=True)
+	net.setInput(blob)
+	outputs = net.forward()
+	#print (outputs)
+	return blob, outputs
+def get_box_dimensions(outputs, height, width):
+	boxes = []
+	class_ids = []
+	for detect in outputs[0,0,:,:]:
+		scores = detect[2]
+		class_id = detect[1]
+		if scores > 0.3:
+			center_x = int(detect[0] * width)
+			center_y = int(detect[1] * height)
+			w = int(detect[5] * width)
+			h = int(detect[6] * height)
+			x = int((detect[3] * width))
+			y = int((detect[4] * height))
+			boxes.append([x, y, w, h])
+			class_ids.append(class_id)
+	return boxes, class_ids
+def draw_labels(boxes, colors, class_ids, classes, img):
+	font = cv2.FONT_HERSHEY_PLAIN
+	model, classes, colors = load_model()
+	for i in range(len(boxes)):
+		x, y, w, h = boxes[i]
+		label = classes[int(class_ids[0])-1]
+		color = colors[i]
+		cv2.rectangle(img, (x,y), (w,h), color, 5)
+		cv2.putText(img, label, (x, y - 5), font, 5, color, 5)
+	return img
+def image_detect(img_path):
+	model, classes, colors = load_model()
+	image, height, width, channels = load_img(img_path)
+	blob, outputs = detect_objects(image, model)
+	boxes, class_ids = get_box_dimensions(outputs, height, width)
+	image1 = draw_labels(boxes, colors, class_ids, classes, image)
+	return image1
+#def start_video(video_path):
+	model, classes, colors = load_model()
+	cap = cv2.VideoCapture(video_path)
+	while True:
+		_, frame = cap.read()
+		height, width, channels = frame.shape
+		blob, outputs = detect_objects(frame, model)
+		boxes, class_ids = get_box_dimensions(outputs, height, width)
+		frame=draw_labels(boxes, colors, class_ids, classes, frame)
+		yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+cv2.destroyAllWindows()

object_detection_classes_coco.txt ADDED Viewed

	@@ -0,0 +1,90 @@

+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+street sign
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+hat
+backpack
+umbrella
+shoe
+eye glasses
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+plate
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+mirror
+dining table
+window
+desk
+toilet
+door
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+blender
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+opencv-contrib-python
+numpy
+gradio

sample/dog.jpg ADDED Viewed

sample/person.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46636187a6af45c1fe71b5b8e14d96eb64908f8b285f29bd194e9e9e66c0cb02
+size 8497766

sample/video_1.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4faf22572a0655605807476f3766e79be5b97bfdb55af020d6404e5561b9e122
+size 1803408

ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

yolov3.cfg ADDED Viewed

	@@ -0,0 +1,789 @@

+[net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=64
+subdivisions=16
+width=608
+height=608
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
+policy=steps
+steps=400000,450000
+scales=.1,.1
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=32
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+# Downsample
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=2
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+[shortcut]
+from=-3
+activation=linear
+######################
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 6,7,8
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 61
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=512
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 3,4,5
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+[route]
+layers = -4
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[upsample]
+stride=2
+[route]
+layers = -1, 36
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=256
+activation=leaky
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=255
+activation=linear
+[yolo]
+mask = 0,1,2
+anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
+classes=80
+num=9
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1

yolov3.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import cv2
+import numpy as np
+#import argparse
+#import time
+#ideo_path = 'D:/OfficeWork/VS_code_exp/exp/video_1.mp4'
+#image_path = 'D:\OfficeWork/VS_code_exp/exp/test.jpg.jpg'
+#Load yolo
+def load_yolo():
+	net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
+	classes = []
+	with open("coco.names", "r") as f:
+		classes = [line.strip() for line in f.readlines()]
+	output_layers = [layer_name for layer_name in net.getUnconnectedOutLayersNames()]
+	colors = np.random.uniform(0, 255, size=(len(classes), 3))
+	return net, classes, colors, output_layers
+def load_image(img_path):
+	# image loading
+	img = cv2.imread(img_path)
+	img = cv2.resize(img, None, fx=0.4, fy=0.4)
+	height, width, channels = img.shape
+	return img, height, width, channels
+def start_webcam():
+	cap = cv2.VideoCapture(0)
+	return cap
+def display_blob(blob):
+	'''
+		Three images each for RED, GREEN, BLUE channel
+	'''
+	for b in blob:
+		for n, imgb in enumerate(b):
+			cv2.imshow(str(n), imgb)
+def detect_objects_yolo(img, net, outputLayers):
+	blob = cv2.dnn.blobFromImage(img, scalefactor=0.00392, size=(320, 320), mean=(0, 0, 0), swapRB=True, crop=False)
+	net.setInput(blob)
+	outputs = net.forward(outputLayers)
+	#output=np.ascontiguousarray(list(outputs))
+	#print(outputs)
+	#for i, out in enumerate(outputs):
+	#	print(i, np.array(out).shape)
+	return blob, outputs
+def get_box_dimensions_yolo(outputs, height, width):
+	boxes = []
+	confs = []
+	class_ids = []
+	for output in outputs:
+		for detect in output:
+			scores = detect[5:]
+			#print('detect', scores)
+			class_id = np.argmax(scores)
+			conf = scores[class_id]
+			if conf > 0.3:
+				center_x = int(detect[0] * width)
+				center_y = int(detect[1] * height)
+				w = int(detect[2] * width)
+				h = int(detect[3] * height)
+				x = int(center_x - w/2)
+				y = int(center_y - h / 2)
+				boxes.append([x, y, w, h])
+				#print(boxes)
+				confs.append(float(conf))
+				class_ids.append(class_id)
+	return boxes, confs, class_ids
+def draw_labels_yolo(boxes, confs, colors, class_ids, classes, img):
+	indexes = cv2.dnn.NMSBoxes(boxes, confs, 0.5, 0.4)
+	font = cv2.FONT_HERSHEY_PLAIN
+	for i in range(len(boxes)):
+		if i in indexes:
+			x, y, w, h = boxes[i]
+			label = str(classes[class_ids[i]])
+			color = colors[i]
+			cv2.rectangle(img, (x,y), (x+w, y+h), color, 5)
+			cv2.putText(img, label, (x, y - 5), font, 5, color, 5)
+	return img
+def image_detect_yolo(img_path):
+	model, classes, colors, output_layers = load_yolo()
+	image, height, width, channels = load_image(img_path)
+	blob, outputs = detect_objects_yolo(image, model, output_layers)
+	#print(outputs)
+	boxes, confs, class_ids = get_box_dimensions_yolo(outputs, height, width)
+	image=draw_labels_yolo(boxes, confs, colors, class_ids, classes, image)
+	return image
+	'''while True:
+		key = cv2.waitKey(1)
+		if key == 27:
+			break'''
+#def webcam_detect():
+	model, classes, colors, output_layers = load_yolo()
+	cap = start_webcam()
+	while True:
+		_, frame = cap.read()
+		height, width, channels = frame.shape
+		blob, outputs = detect_objects(frame, model, output_layers)
+		boxes, confs, class_ids = get_box_dimensions(outputs, height, width)
+		draw_labels(boxes, confs, colors, class_ids, classes, frame)
+		key = cv2.waitKey(1)
+		if key == 27:
+			break
+	cap.release()
+#def start_video_yolo(video_path):
+	model, classes, colors, output_layers = load_yolo()
+	cap = cv2.VideoCapture(video_path)
+	while True:
+		_, frame = cap.read()
+		height, width, channels = frame.shape
+		blob, outputs = detect_objects_yolo(frame, model, output_layers)
+		boxes, confs, class_ids = get_box_dimensions_yolo(outputs, height, width)
+		frame=draw_labels_yolo(boxes, confs, colors, class_ids, classes, frame)
+		yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+		'''key = cv2.waitKey(1)
+		if key == 27 :
+			break
+	cap.release()'''
+cv2.destroyAllWindows()

yolov3.weights ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:523e4e69e1d015393a1b0a441cef1d9c7659e3eb2d7e15f793f060a21b32f297
+size 248007048