Webcam-Object-Recognition-Yolo-n-Coco

Runtime error

App Files Files Community

jbraun19

awacke1 commited on Feb 17, 2023

Commit

5c00143

•

0 Parent(s):

Duplicate from awacke1/Webcam-Object-Recognition-Yolo-n-Coco

Browse files

Co-authored-by: Aaron C Wacker <awacke1@users.noreply.huggingface.co>

Files changed (16) hide show

.gitattributes +35 -0
README.md +13 -0
app.py +21 -0
class_names/README.md +1 -0
class_names/bccd_classes.txt +3 -0
class_names/coco_classes.txt +80 -0
coco_classes.txt +80 -0
config.py +17 -0
custom_callbacks.py +15 -0
custom_layers.py +298 -0
loss.py +212 -0
models.py +530 -0
requirements.txt +96 -0
utils.py +475 -0
xml_to_txt.py +42 -0
yolov4.weights +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+yolov4.weights filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: 📷 Webcam Object Recognition Yolo Coco 🔍 Live Gradio
+emoji: 📷Live
+colorFrom: purple
+colorTo: blue
+sdk: gradio
+sdk_version: 3.16.2
+app_file: app.py
+pinned: false
+duplicated_from: awacke1/Webcam-Object-Recognition-Yolo-n-Coco
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import tensorflow as tf
+import cv2
+import numpy as np
+from glob import glob
+from models import Yolov4
+import gradio as gr
+model = Yolov4(weight_path="yolov4.weights", class_name_path='coco_classes.txt')
+def gradio_wrapper(img):
+    global model
+    #print(np.shape(img))
+    results = model.predict(img)
+    return results[0]
+demo = gr.Interface(
+    gradio_wrapper,
+    #gr.Image(source="webcam", streaming=True, flip=True),
+    gr.Image(source="webcam", streaming=True),
+    "image",
+    live=True
+)
+demo.launch()

class_names/README.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ test

class_names/bccd_classes.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+WBC
+Platelets
+RBC

class_names/coco_classes.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

coco_classes.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush

config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+yolo_config = {
+    # Basic
+    'img_size': (416, 416, 3),
+    'anchors': [12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401],
+    'strides': [8, 16, 32],
+    'xyscale': [1.2, 1.1, 1.05],
+    # Training
+    'iou_loss_thresh': 0.5,
+    'batch_size': 8,
+    'num_gpu': 1,  # 2,
+    # Inference
+    'max_boxes': 100,
+    'iou_threshold': 0.413,
+    'score_threshold': 0.3,
+}

custom_callbacks.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from tensorflow.keras import callbacks
+import math
+class CosineAnnealingScheduler(callbacks.LearningRateScheduler):
+    def __init__(self, epochs_per_cycle, lr_min, lr_max, verbose=0):
+        super(callbacks.LearningRateScheduler, self).__init__()
+        self.verbose = verbose
+        self.lr_min = lr_min
+        self.lr_max = lr_max
+        self.epochs_per_cycle = epochs_per_cycle
+    def schedule(self, epoch, lr):
+        return self.lr_min + (self.lr_max - self.lr_min) *\
+               (1 + math.cos(math.pi * (epoch % self.epochs_per_cycle) / self.epochs_per_cycle)) / 2

custom_layers.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import tensorflow as tf
+from tensorflow.keras import layers, initializers, models
+def conv(x, filters, kernel_size, downsampling=False, activation='leaky', batch_norm=True):
+    def mish(x):
+        return x * tf.math.tanh(tf.math.softplus(x))
+    if downsampling:
+        x = layers.ZeroPadding2D(padding=((1, 0), (1, 0)))(x)  # top & left padding
+        padding = 'valid'
+        strides = 2
+    else:
+        padding = 'same'
+        strides = 1
+    x = layers.Conv2D(filters,
+                      kernel_size,
+                      strides=strides,
+                      padding=padding,
+                      use_bias=not batch_norm,
+                      # kernel_regularizer=regularizers.l2(0.0005),
+                      kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01),
+                      # bias_initializer=initializers.Zeros()
+                      )(x)
+    if batch_norm:
+        x = layers.BatchNormalization()(x)
+    if activation == 'mish':
+        x = mish(x)
+    elif activation == 'leaky':
+        x = layers.LeakyReLU(alpha=0.1)(x)
+    return x
+def residual_block(x, filters1, filters2, activation='leaky'):
+    """
+    :param x: input tensor
+    :param filters1: num of filter for 1x1 conv
+    :param filters2: num of filter for 3x3 conv
+    :param activation: default activation function: leaky relu
+    :return:
+    """
+    y = conv(x, filters1, kernel_size=1, activation=activation)
+    y = conv(y, filters2, kernel_size=3, activation=activation)
+    return layers.Add()([x, y])
+def csp_block(x, residual_out, repeat, residual_bottleneck=False):
+    """
+    Cross Stage Partial Network (CSPNet)
+    transition_bottleneck_dims: 1x1 bottleneck
+    output_dims: 3x3
+    :param x:
+    :param residual_out:
+    :param repeat:
+    :param residual_bottleneck:
+    :return:
+    """
+    route = x
+    route = conv(route, residual_out, 1, activation="mish")
+    x = conv(x, residual_out, 1, activation="mish")
+    for i in range(repeat):
+        x = residual_block(x,
+                           residual_out // 2 if residual_bottleneck else residual_out,
+                           residual_out,
+                           activation="mish")
+    x = conv(x, residual_out, 1, activation="mish")
+    x = layers.Concatenate()([x, route])
+    return x
+def darknet53(x):
+    x = conv(x, 32, 3)
+    x = conv(x, 64, 3, downsampling=True)
+    for i in range(1):
+        x = residual_block(x, 32, 64)
+    x = conv(x, 128, 3, downsampling=True)
+    for i in range(2):
+        x = residual_block(x, 64, 128)
+    x = conv(x, 256, 3, downsampling=True)
+    for i in range(8):
+        x = residual_block(x, 128, 256)
+    route_1 = x
+    x = conv(x, 512, 3, downsampling=True)
+    for i in range(8):
+        x = residual_block(x, 256, 512)
+    route_2 = x
+    x = conv(x, 1024, 3, downsampling=True)
+    for i in range(4):
+        x = residual_block(x, 512, 1024)
+    return route_1, route_2, x
+def cspdarknet53(input):
+    x = conv(input, 32, 3)
+    x = conv(x, 64, 3, downsampling=True)
+    x = csp_block(x, residual_out=64, repeat=1, residual_bottleneck=True)
+    x = conv(x, 64, 1, activation='mish')
+    x = conv(x, 128, 3, activation='mish', downsampling=True)
+    x = csp_block(x, residual_out=64, repeat=2)
+    x = conv(x, 128, 1, activation='mish')
+    x = conv(x, 256, 3, activation='mish', downsampling=True)
+    x = csp_block(x, residual_out=128, repeat=8)
+    x = conv(x, 256, 1, activation='mish')
+    route0 = x
+    x = conv(x, 512, 3, activation='mish', downsampling=True)
+    x = csp_block(x, residual_out=256, repeat=8)
+    x = conv(x, 512, 1, activation='mish')
+    route1 = x
+    x = conv(x, 1024, 3, activation='mish', downsampling=True)
+    x = csp_block(x, residual_out=512, repeat=4)
+    x = conv(x, 1024, 1, activation="mish")
+    x = conv(x, 512, 1)
+    x = conv(x, 1024, 3)
+    x = conv(x, 512, 1)
+    x = layers.Concatenate()([layers.MaxPooling2D(pool_size=13, strides=1, padding='same')(x),
+                              layers.MaxPooling2D(pool_size=9, strides=1, padding='same')(x),
+                              layers.MaxPooling2D(pool_size=5, strides=1, padding='same')(x),
+                              x
+                              ])
+    x = conv(x, 512, 1)
+    x = conv(x, 1024, 3)
+    route2 = conv(x, 512, 1)
+    return models.Model(input, [route0, route1, route2])
+def yolov4_neck(x, num_classes):
+    backbone_model = cspdarknet53(x)
+    route0, route1, route2 = backbone_model.output
+    route_input = route2
+    x = conv(route2, 256, 1)
+    x = layers.UpSampling2D()(x)
+    route1 = conv(route1, 256, 1)
+    x = layers.Concatenate()([route1, x])
+    x = conv(x, 256, 1)
+    x = conv(x, 512, 3)
+    x = conv(x, 256, 1)
+    x = conv(x, 512, 3)
+    x = conv(x, 256, 1)
+    route1 = x
+    x = conv(x, 128, 1)
+    x = layers.UpSampling2D()(x)
+    route0 = conv(route0, 128, 1)
+    x = layers.Concatenate()([route0, x])
+    x = conv(x, 128, 1)
+    x = conv(x, 256, 3)
+    x = conv(x, 128, 1)
+    x = conv(x, 256, 3)
+    x = conv(x, 128, 1)
+    route0 = x
+    x = conv(x, 256, 3)
+    conv_sbbox = conv(x, 3 * (num_classes + 5), 1, activation=None, batch_norm=False)
+    x = conv(route0, 256, 3, downsampling=True)
+    x = layers.Concatenate()([x, route1])
+    x = conv(x, 256, 1)
+    x = conv(x, 512, 3)
+    x = conv(x, 256, 1)
+    x = conv(x, 512, 3)
+    x = conv(x, 256, 1)
+    route1 = x
+    x = conv(x, 512, 3)
+    conv_mbbox = conv(x, 3 * (num_classes + 5), 1, activation=None, batch_norm=False)
+    x = conv(route1, 512, 3, downsampling=True)
+    x = layers.Concatenate()([x, route_input])
+    x = conv(x, 512, 1)
+    x = conv(x, 1024, 3)
+    x = conv(x, 512, 1)
+    x = conv(x, 1024, 3)
+    x = conv(x, 512, 1)
+    x = conv(x, 1024, 3)
+    conv_lbbox = conv(x, 3 * (num_classes + 5), 1, activation=None, batch_norm=False)
+    return [conv_sbbox, conv_mbbox, conv_lbbox]
+def yolov4_head(yolo_neck_outputs, classes, anchors, xyscale):
+    bbox0, object_probability0, class_probabilities0, pred_box0 = get_boxes(yolo_neck_outputs[0],
+                                                                            anchors=anchors[0, :, :], classes=classes,
+                                                                            grid_size=52, strides=8,
+                                                                            xyscale=xyscale[0])
+    bbox1, object_probability1, class_probabilities1, pred_box1 = get_boxes(yolo_neck_outputs[1],
+                                                                            anchors=anchors[1, :, :], classes=classes,
+                                                                            grid_size=26, strides=16,
+                                                                            xyscale=xyscale[1])
+    bbox2, object_probability2, class_probabilities2, pred_box2 = get_boxes(yolo_neck_outputs[2],
+                                                                            anchors=anchors[2, :, :], classes=classes,
+                                                                            grid_size=13, strides=32,
+                                                                            xyscale=xyscale[2])
+    x = [bbox0, object_probability0, class_probabilities0, pred_box0,
+         bbox1, object_probability1, class_probabilities1, pred_box1,
+         bbox2, object_probability2, class_probabilities2, pred_box2]
+    return x
+def get_boxes(pred, anchors, classes, grid_size, strides, xyscale):
+    """
+    :param pred:
+    :param anchors:
+    :param classes:
+    :param grid_size:
+    :param strides:
+    :param xyscale:
+    :return:
+    """
+    pred = tf.reshape(pred,
+                      (tf.shape(pred)[0],
+                       grid_size,
+                       grid_size,
+                       3,
+                       5 + classes))  # (batch_size, grid_size, grid_size, 3, 5+classes)
+    box_xy, box_wh, obj_prob, class_prob = tf.split(
+        pred, (2, 2, 1, classes), axis=-1
+    )  # (?, 52, 52, 3, 2) (?, 52, 52, 3, 2) (?, 52, 52, 3, 1) (?, 52, 52, 3, 80)
+    box_xy = tf.sigmoid(box_xy)  # (?, 52, 52, 3, 2)
+    obj_prob = tf.sigmoid(obj_prob)  # (?, 52, 52, 3, 1)
+    class_prob = tf.sigmoid(class_prob)  # (?, 52, 52, 3, 80)
+    pred_box_xywh = tf.concat((box_xy, box_wh), axis=-1)  # (?, 52, 52, 3, 4)
+    grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))  # (52, 52) (52, 52)
+    grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)  # (52, 52, 1, 2)
+    grid = tf.cast(grid, dtype=tf.float32)
+    box_xy = ((box_xy * xyscale) - 0.5 * (xyscale - 1) + grid) * strides  # (?, 52, 52, 1, 4)
+    box_wh = tf.exp(box_wh) * anchors  # (?, 52, 52, 3, 2)
+    box_x1y1 = box_xy - box_wh / 2  # (?, 52, 52, 3, 2)
+    box_x2y2 = box_xy + box_wh / 2  # (?, 52, 52, 3, 2)
+    pred_box_x1y1x2y2 = tf.concat([box_x1y1, box_x2y2], axis=-1)  # (?, 52, 52, 3, 4)
+    return pred_box_x1y1x2y2, obj_prob, class_prob, pred_box_xywh
+    # pred_box_x1y1x2y2: absolute xy value
+def nms(model_ouputs, input_shape, num_class, iou_threshold=0.413, score_threshold=0.3):
+    """
+    Apply Non-Maximum suppression
+    ref: https://www.tensorflow.org/api_docs/python/tf/image/combined_non_max_suppression
+    :param model_ouputs: yolo model model_ouputs
+    :param input_shape: size of input image
+    :return: nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections
+    """
+    bs = tf.shape(model_ouputs[0])[0]
+    boxes = tf.zeros((bs, 0, 4))
+    confidence = tf.zeros((bs, 0, 1))
+    class_probabilities = tf.zeros((bs, 0, num_class))
+    for output_idx in range(0, len(model_ouputs), 4):
+        output_xy = model_ouputs[output_idx]
+        output_conf = model_ouputs[output_idx + 1]
+        output_classes = model_ouputs[output_idx + 2]
+        boxes = tf.concat([boxes, tf.reshape(output_xy, (bs, -1, 4))], axis=1)
+        confidence = tf.concat([confidence, tf.reshape(output_conf, (bs, -1, 1))], axis=1)
+        class_probabilities = tf.concat([class_probabilities, tf.reshape(output_classes, (bs, -1, num_class))], axis=1)
+    scores = confidence * class_probabilities
+    boxes = tf.expand_dims(boxes, axis=-2)
+    boxes = boxes / input_shape[0]  # box normalization: relative img size
+    print(f'nms iou: {iou_threshold} score: {score_threshold}')
+    (nmsed_boxes,      # [bs, max_detections, 4]
+     nmsed_scores,     # [bs, max_detections]
+     nmsed_classes,    # [bs, max_detections]
+     valid_detections  # [batch_size]
+     ) = tf.image.combined_non_max_suppression(
+        boxes=boxes,  # y1x1, y2x2 [0~1]
+        scores=scores,
+        max_output_size_per_class=100,
+        max_total_size=100,  # max_boxes: Maximum nmsed_boxes in a single img.
+        iou_threshold=iou_threshold,  # iou_threshold: Minimum overlap that counts as a valid detection.
+        score_threshold=score_threshold,  # # Minimum confidence that counts as a valid detection.
+    )
+    return nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections

loss.py ADDED Viewed

	@@ -0,0 +1,212 @@

+#!/usr/bin/env python
+# coding: utf-8
+import numpy as np
+import math
+import tensorflow.keras.backend as K
+import tensorflow as tf
+def xywh_to_x1y1x2y2(boxes):
+    return tf.concat([boxes[..., :2] - boxes[..., 2:] * 0.5, boxes[..., :2] + boxes[..., 2:] * 0.5], axis=-1)
+# x,y,w,h
+def bbox_iou(boxes1, boxes2):
+    boxes1_area = boxes1[..., 2] * boxes1[..., 3]  # w * h
+    boxes2_area = boxes2[..., 2] * boxes2[..., 3]
+    # (x, y, w, h) -> (x0, y0, x1, y1)
+    boxes1 = xywh_to_x1y1x2y2(boxes1)
+    boxes2 = xywh_to_x1y1x2y2(boxes2)
+    # coordinates of intersection
+    top_left = tf.maximum(boxes1[..., :2], boxes2[..., :2])
+    bottom_right = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
+    intersection_xy = tf.maximum(bottom_right - top_left, 0.0)
+    intersection_area = intersection_xy[..., 0] * intersection_xy[..., 1]
+    union_area = boxes1_area + boxes2_area - intersection_area
+    return 1.0 * intersection_area / (union_area + tf.keras.backend.epsilon())
+def bbox_giou(boxes1, boxes2):
+    boxes1_area = boxes1[..., 2] * boxes1[..., 3]  # w*h
+    boxes2_area = boxes2[..., 2] * boxes2[..., 3]
+    # (x, y, w, h) -> (x0, y0, x1, y1)
+    boxes1 = xywh_to_x1y1x2y2(boxes1)
+    boxes2 = xywh_to_x1y1x2y2(boxes2)
+    top_left = tf.maximum(boxes1[..., :2], boxes2[..., :2])
+    bottom_right = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
+    intersection_xy = tf.maximum(bottom_right - top_left, 0.0)
+    intersection_area = intersection_xy[..., 0] * intersection_xy[..., 1]
+    union_area = boxes1_area + boxes2_area - intersection_area
+    iou = 1.0 * intersection_area / (union_area + tf.keras.backend.epsilon())
+    enclose_top_left = tf.minimum(boxes1[..., :2], boxes2[..., :2])
+    enclose_bottom_right = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
+    enclose_xy = enclose_bottom_right - enclose_top_left
+    enclose_area = enclose_xy[..., 0] * enclose_xy[..., 1]
+    giou = iou - tf.math.divide_no_nan(enclose_area - union_area, enclose_area)
+    return giou
+def bbox_ciou(boxes1, boxes2):
+    '''
+    ciou = iou - p2/c2 - av
+    :param boxes1: (8, 13, 13, 3, 4)   pred_xywh
+    :param boxes2: (8, 13, 13, 3, 4)   label_xywh
+    :return:
+    '''
+    boxes1_x0y0x1y1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
+                                 boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
+    boxes2_x0y0x1y1 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
+                                 boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
+    boxes1_x0y0x1y1 = tf.concat([tf.minimum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:]),
+                                 tf.maximum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:])], axis=-1)
+    boxes2_x0y0x1y1 = tf.concat([tf.minimum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:]),
+                                 tf.maximum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:])], axis=-1)
+    # area
+    boxes1_area = (boxes1_x0y0x1y1[..., 2] - boxes1_x0y0x1y1[..., 0]) * (
+                boxes1_x0y0x1y1[..., 3] - boxes1_x0y0x1y1[..., 1])
+    boxes2_area = (boxes2_x0y0x1y1[..., 2] - boxes2_x0y0x1y1[..., 0]) * (
+                boxes2_x0y0x1y1[..., 3] - boxes2_x0y0x1y1[..., 1])
+    # top-left and bottom-right coord, shape: (8, 13, 13, 3, 2)
+    left_up = tf.maximum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2])
+    right_down = tf.minimum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:])
+    # intersection area and iou
+    inter_section = tf.maximum(right_down - left_up, 0.0)
+    inter_area = inter_section[..., 0] * inter_section[..., 1]
+    union_area = boxes1_area + boxes2_area - inter_area
+    iou = inter_area / (union_area + 1e-9)
+    # top-left and bottom-right coord of the enclosing rectangle, shape: (8, 13, 13, 3, 2)
+    enclose_left_up = tf.minimum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2])
+    enclose_right_down = tf.maximum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:])
+    # diagnal ** 2
+    enclose_wh = enclose_right_down - enclose_left_up
+    enclose_c2 = K.pow(enclose_wh[..., 0], 2) + K.pow(enclose_wh[..., 1], 2)
+    # center distances between two rectangles
+    p2 = K.pow(boxes1[..., 0] - boxes2[..., 0], 2) + K.pow(boxes1[..., 1] - boxes2[..., 1], 2)
+    # add av
+    atan1 = tf.atan(boxes1[..., 2] / (boxes1[..., 3] + 1e-9))
+    atan2 = tf.atan(boxes2[..., 2] / (boxes2[..., 3] + 1e-9))
+    v = 4.0 * K.pow(atan1 - atan2, 2) / (math.pi ** 2)
+    a = v / (1 - iou + v)
+    ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v
+    return ciou
+def yolo_loss(args, num_classes, iou_loss_thresh, anchors):
+    conv_lbbox = args[2]   # (?, ?, ?, 3*(num_classes+5))
+    conv_mbbox = args[1]   # (?, ?, ?, 3*(num_classes+5))
+    conv_sbbox = args[0]   # (?, ?, ?, 3*(num_classes+5))
+    label_sbbox = args[3]   # (?, ?, ?, 3, num_classes+5)
+    label_mbbox = args[4]   # (?, ?, ?, 3, num_classes+5)
+    label_lbbox = args[5]   # (?, ?, ?, 3, num_classes+5)
+    true_bboxes = args[6]   # (?, 50, 4)
+    pred_sbbox = decode(conv_sbbox, anchors[0], 8, num_classes)
+    pred_mbbox = decode(conv_mbbox, anchors[1], 16, num_classes)
+    pred_lbbox = decode(conv_lbbox, anchors[2], 32, num_classes)
+    sbbox_ciou_loss, sbbox_conf_loss, sbbox_prob_loss = loss_layer(conv_sbbox, pred_sbbox, label_sbbox, true_bboxes, 8, num_classes, iou_loss_thresh)
+    mbbox_ciou_loss, mbbox_conf_loss, mbbox_prob_loss = loss_layer(conv_mbbox, pred_mbbox, label_mbbox, true_bboxes, 16, num_classes, iou_loss_thresh)
+    lbbox_ciou_loss, lbbox_conf_loss, lbbox_prob_loss = loss_layer(conv_lbbox, pred_lbbox, label_lbbox, true_bboxes, 32, num_classes, iou_loss_thresh)
+    ciou_loss = (lbbox_ciou_loss + sbbox_ciou_loss + mbbox_ciou_loss) * 3.54
+    conf_loss = (lbbox_conf_loss + sbbox_conf_loss + mbbox_conf_loss) * 64.3
+    prob_loss = (lbbox_prob_loss + sbbox_prob_loss + mbbox_prob_loss) * 1
+    return ciou_loss+conf_loss+prob_loss
+def loss_layer(conv, pred, label, bboxes, stride, num_class, iou_loss_thresh):
+    conv_shape = tf.shape(conv)
+    batch_size = conv_shape[0]
+    output_size = conv_shape[1]
+    input_size = stride * output_size
+    conv = tf.reshape(conv, (batch_size, output_size, output_size,
+                             3, 5 + num_class))
+    conv_raw_prob = conv[:, :, :, :, 5:]
+    conv_raw_conf = conv[:, :, :, :, 4:5]
+    pred_xywh = pred[:, :, :, :, 0:4]
+    pred_conf = pred[:, :, :, :, 4:5]
+    label_xywh = label[:, :, :, :, 0:4]
+    respond_bbox = label[:, :, :, :, 4:5]
+    label_prob = label[:, :, :, :, 5:]
+    # Coordinate loss
+    ciou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)  # (8, 13, 13, 3, 1)
+    # ciou = tf.expand_dims(bbox_ciou(pred_xywh, label_xywh), axis=-1)  # (8, 13, 13, 3, 1)
+    input_size = tf.cast(input_size, tf.float32)
+    # loss weight of the gt bbox: 2-(gt area/img area)
+    bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
+    ciou_loss = respond_bbox * bbox_loss_scale * (1 - ciou)  # iou loss for respond bbox
+    # Classification loss for respond bbox
+    prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
+    expand_pred_xywh = pred_xywh[:, :, :, :, np.newaxis, :]  # (?, grid_h, grid_w, 3, 1, 4)
+    expand_bboxes = bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :]  # (?, 1, 1, 1, 70, 4)
+    iou = bbox_iou(expand_pred_xywh, expand_bboxes)  # IoU between all pred bbox and all gt (?, grid_h, grid_w, 3, 70)
+    max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)  # max iou: (?, grid_h, grid_w, 3, 1)
+    # ignore the bbox which is not respond bbox and max iou < threshold
+    respond_bgd = (1.0 - respond_bbox) * tf.cast(max_iou < iou_loss_thresh, tf.float32)
+    # Confidence loss
+    conf_focal = tf.pow(respond_bbox - pred_conf, 2)
+    conf_loss = conf_focal * (
+            respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+            +
+            respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+    )
+    ciou_loss = tf.reduce_mean(tf.reduce_sum(ciou_loss, axis=[1, 2, 3, 4]))
+    conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1, 2, 3, 4]))
+    prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1, 2, 3, 4]))
+    return ciou_loss, conf_loss, prob_loss
+def decode(conv_output, anchors, stride, num_class):
+    conv_shape = tf.shape(conv_output)
+    batch_size = conv_shape[0]
+    output_size = conv_shape[1]
+    anchor_per_scale = len(anchors)
+    conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, anchor_per_scale, 5 + num_class))
+    conv_raw_dxdy = conv_output[:, :, :, :, 0:2]
+    conv_raw_dwdh = conv_output[:, :, :, :, 2:4]
+    conv_raw_conf = conv_output[:, :, :, :, 4:5]
+    conv_raw_prob = conv_output[:, :, :, :, 5:]
+    y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
+    x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
+    xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
+    xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, anchor_per_scale, 1])
+    xy_grid = tf.cast(xy_grid, tf.float32)
+    pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * stride
+    pred_wh = (tf.exp(conv_raw_dwdh) * anchors)
+    pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
+    pred_conf = tf.sigmoid(conv_raw_conf)
+    pred_prob = tf.sigmoid(conv_raw_prob)
+    return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)

models.py ADDED Viewed

	@@ -0,0 +1,530 @@

+import numpy as np
+import cv2
+import os
+import json
+from tqdm import tqdm
+from glob import glob
+import matplotlib.pyplot as plt
+import tensorflow as tf
+from tensorflow.keras import layers, models, optimizers
+from custom_layers import yolov4_neck, yolov4_head, nms
+from utils import load_weights, get_detection_data, draw_bbox, voc_ap, draw_plot_func, read_txt_to_list
+from config import yolo_config
+from loss import yolo_loss
+class Yolov4(object):
+    def __init__(self,
+                 weight_path=None,
+                 class_name_path='coco_classes.txt',
+                 config=yolo_config,
+                 ):
+        assert config['img_size'][0] == config['img_size'][1], 'not support yet'
+        assert config['img_size'][0] % config['strides'][-1] == 0, 'must be a multiple of last stride'
+        self.class_names = [line.strip() for line in open(class_name_path).readlines()]
+        self.img_size = yolo_config['img_size']
+        self.num_classes = len(self.class_names)
+        self.weight_path = weight_path
+        self.anchors = np.array(yolo_config['anchors']).reshape((3, 3, 2))
+        self.xyscale = yolo_config['xyscale']
+        self.strides = yolo_config['strides']
+        self.output_sizes = [self.img_size[0] // s for s in self.strides]
+        self.class_color = {name: list(np.random.random(size=3)*255) for name in self.class_names}
+        # Training
+        self.max_boxes = yolo_config['max_boxes']
+        self.iou_loss_thresh = yolo_config['iou_loss_thresh']
+        self.config = yolo_config
+        assert self.num_classes > 0, 'no classes detected!'
+        tf.keras.backend.clear_session()
+        if yolo_config['num_gpu'] > 1:
+            mirrored_strategy = tf.distribute.MirroredStrategy()
+            with mirrored_strategy.scope():
+                self.build_model(load_pretrained=True if self.weight_path else False)
+        else:
+            self.build_model(load_pretrained=True if self.weight_path else False)
+    def build_model(self, load_pretrained=True):
+        # core yolo model
+        input_layer = layers.Input(self.img_size)
+        yolov4_output = yolov4_neck(input_layer, self.num_classes)
+        self.yolo_model = models.Model(input_layer, yolov4_output)
+        # Build training model
+        y_true = [
+            layers.Input(name='input_2', shape=(52, 52, 3, (self.num_classes + 5))),  # label small boxes
+            layers.Input(name='input_3', shape=(26, 26, 3, (self.num_classes + 5))),  # label medium boxes
+            layers.Input(name='input_4', shape=(13, 13, 3, (self.num_classes + 5))),  # label large boxes
+            layers.Input(name='input_5', shape=(self.max_boxes, 4)),  # true bboxes
+        ]
+        loss_list = tf.keras.layers.Lambda(yolo_loss, name='yolo_loss',
+                                           arguments={'num_classes': self.num_classes,
+                                                      'iou_loss_thresh': self.iou_loss_thresh,
+                                                      'anchors': self.anchors})([*self.yolo_model.output, *y_true])
+        self.training_model = models.Model([self.yolo_model.input, *y_true], loss_list)
+        # Build inference model
+        yolov4_output = yolov4_head(yolov4_output, self.num_classes, self.anchors, self.xyscale)
+        # output: [boxes, scores, classes, valid_detections]
+        self.inference_model = models.Model(input_layer,
+                                            nms(yolov4_output, self.img_size, self.num_classes,
+                                                iou_threshold=self.config['iou_threshold'],
+                                                score_threshold=self.config['score_threshold']))
+        if load_pretrained and self.weight_path and self.weight_path.endswith('.weights'):
+            if self.weight_path.endswith('.weights'):
+                load_weights(self.yolo_model, self.weight_path)
+                print(f'load from {self.weight_path}')
+            elif self.weight_path.endswith('.h5'):
+                self.training_model.load_weights(self.weight_path)
+                print(f'load from {self.weight_path}')
+        self.training_model.compile(optimizer=optimizers.Adam(lr=1e-3),
+                                    loss={'yolo_loss': lambda y_true, y_pred: y_pred})
+    def load_model(self, path):
+        self.yolo_model = models.load_model(path, compile=False)
+        yolov4_output = yolov4_head(self.yolo_model.output, self.num_classes, self.anchors, self.xyscale)
+        self.inference_model = models.Model(self.yolo_model.input,
+                                            nms(yolov4_output, self.img_size, self.num_classes))  # [boxes, scores, classes, valid_detections]
+    def save_model(self, path):
+        self.yolo_model.save(path)
+    def preprocess_img(self, img):
+        img = cv2.resize(img, self.img_size[:2])
+        img = img / 255.
+        return img
+    def fit(self, train_data_gen, epochs, val_data_gen=None, initial_epoch=0, callbacks=None):
+        self.training_model.fit(train_data_gen,
+                                steps_per_epoch=len(train_data_gen),
+                                validation_data=val_data_gen,
+                                validation_steps=len(val_data_gen),
+                                epochs=epochs,
+                                callbacks=callbacks,
+                                initial_epoch=initial_epoch)
+    # raw_img: RGB
+    def predict_img(self, raw_img, random_color=True, plot_img=True, figsize=(10, 10), show_text=True, return_output=True):
+        print('img shape: ', raw_img.shape)
+        img = self.preprocess_img(raw_img)
+        imgs = np.expand_dims(img, axis=0)
+        pred_output = self.inference_model.predict(imgs)
+        detections = get_detection_data(img=raw_img,
+                                        model_outputs=pred_output,
+                                        class_names=self.class_names)
+        output_img = draw_bbox(raw_img, detections, cmap=self.class_color, random_color=random_color, figsize=figsize,
+                  show_text=show_text, show_img=False)
+        if return_output:
+            return output_img, detections
+        else:
+            return detections
+    def predict(self, img_path, random_color=True, plot_img=True, figsize=(10, 10), show_text=True):
+        raw_img = img_path
+        return self.predict_img(raw_img, random_color, plot_img, figsize, show_text)
+    def export_gt(self, annotation_path, gt_folder_path):
+        with open(annotation_path) as file:
+            for line in file:
+                line = line.split(' ')
+                filename = line[0].split(os.sep)[-1].split('.')[0]
+                objs = line[1:]
+                # export txt file
+                with open(os.path.join(gt_folder_path, filename + '.txt'), 'w') as output_file:
+                    for obj in objs:
+                        x_min, y_min, x_max, y_max, class_id = [float(o) for o in obj.strip().split(',')]
+                        output_file.write(f'{self.class_names[int(class_id)]} {x_min} {y_min} {x_max} {y_max}\n')
+    def export_prediction(self, annotation_path, pred_folder_path, img_folder_path, bs=2):
+        with open(annotation_path) as file:
+            img_paths = [os.path.join(img_folder_path, line.split(' ')[0].split(os.sep)[-1]) for line in file]
+            # print(img_paths[:20])
+            for batch_idx in tqdm(range(0, len(img_paths), bs)):
+                # print(len(img_paths), batch_idx, batch_idx*bs, (batch_idx+1)*bs)
+                paths = img_paths[batch_idx:batch_idx+bs]
+                # print(paths)
+                # read and process img
+                imgs = np.zeros((len(paths), *self.img_size))
+                raw_img_shapes = []
+                for j, path in enumerate(paths):
+                    img = cv2.imread(path)
+                    raw_img_shapes.append(img.shape)
+                    img = self.preprocess_img(img)
+                    imgs[j] = img
+                # process batch output
+                b_boxes, b_scores, b_classes, b_valid_detections = self.inference_model.predict(imgs)
+                for k in range(len(paths)):
+                    num_boxes = b_valid_detections[k]
+                    raw_img_shape = raw_img_shapes[k]
+                    boxes = b_boxes[k, :num_boxes]
+                    classes = b_classes[k, :num_boxes]
+                    scores = b_scores[k, :num_boxes]
+                    # print(raw_img_shape)
+                    boxes[:, [0, 2]] = (boxes[:, [0, 2]] * raw_img_shape[1])  # w
+                    boxes[:, [1, 3]] = (boxes[:, [1, 3]] * raw_img_shape[0])  # h
+                    cls_names = [self.class_names[int(c)] for c in classes]
+                    # print(raw_img_shape, boxes.astype(int), cls_names, scores)
+                    img_path = paths[k]
+                    filename = img_path.split(os.sep)[-1].split('.')[0]
+                    # print(filename)
+                    output_path = os.path.join(pred_folder_path, filename+'.txt')
+                    with open(output_path, 'w') as pred_file:
+                        for box_idx in range(num_boxes):
+                            b = boxes[box_idx]
+                            pred_file.write(f'{cls_names[box_idx]} {scores[box_idx]} {b[0]} {b[1]} {b[2]} {b[3]}\n')
+    def eval_map(self, gt_folder_path, pred_folder_path, temp_json_folder_path, output_files_path):
+        """Process Gt"""
+        ground_truth_files_list = glob(gt_folder_path + '/*.txt')
+        assert len(ground_truth_files_list) > 0, 'no ground truth file'
+        ground_truth_files_list.sort()
+        # dictionary with counter per class
+        gt_counter_per_class = {}
+        counter_images_per_class = {}
+        gt_files = []
+        for txt_file in ground_truth_files_list:
+            file_id = txt_file.split(".txt", 1)[0]
+            file_id = os.path.basename(os.path.normpath(file_id))
+            # check if there is a correspondent detection-results file
+            temp_path = os.path.join(pred_folder_path, (file_id + ".txt"))
+            assert os.path.exists(temp_path), "Error. File not found: {}\n".format(temp_path)
+            lines_list = read_txt_to_list(txt_file)
+            # create ground-truth dictionary
+            bounding_boxes = []
+            is_difficult = False
+            already_seen_classes = []
+            for line in lines_list:
+                class_name, left, top, right, bottom = line.split()
+                # check if class is in the ignore list, if yes skip
+                bbox = left + " " + top + " " + right + " " + bottom
+                bounding_boxes.append({"class_name": class_name, "bbox": bbox, "used": False})
+                # count that object
+                if class_name in gt_counter_per_class:
+                    gt_counter_per_class[class_name] += 1
+                else:
+                    # if class didn't exist yet
+                    gt_counter_per_class[class_name] = 1
+                if class_name not in already_seen_classes:
+                    if class_name in counter_images_per_class:
+                        counter_images_per_class[class_name] += 1
+                    else:
+                        # if class didn't exist yet
+                        counter_images_per_class[class_name] = 1
+                    already_seen_classes.append(class_name)
+            # dump bounding_boxes into a ".json" file
+            new_temp_file = os.path.join(temp_json_folder_path, file_id+"_ground_truth.json") #TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json"
+            gt_files.append(new_temp_file)
+            with open(new_temp_file, 'w') as outfile:
+                json.dump(bounding_boxes, outfile)
+        gt_classes = list(gt_counter_per_class.keys())
+        # let's sort the classes alphabetically
+        gt_classes = sorted(gt_classes)
+        n_classes = len(gt_classes)
+        print(gt_classes, gt_counter_per_class)
+        """Process prediction"""
+        dr_files_list = sorted(glob(os.path.join(pred_folder_path, '*.txt')))
+        for class_index, class_name in enumerate(gt_classes):
+            bounding_boxes = []
+            for txt_file in dr_files_list:
+                # the first time it checks if all the corresponding ground-truth files exist
+                file_id = txt_file.split(".txt", 1)[0]
+                file_id = os.path.basename(os.path.normpath(file_id))
+                temp_path = os.path.join(gt_folder_path, (file_id + ".txt"))
+                if class_index == 0:
+                    if not os.path.exists(temp_path):
+                        error_msg = f"Error. File not found: {temp_path}\n"
+                        print(error_msg)
+                lines = read_txt_to_list(txt_file)
+                for line in lines:
+                    try:
+                        tmp_class_name, confidence, left, top, right, bottom = line.split()
+                    except ValueError:
+                        error_msg = f"""Error: File {txt_file} in the wrong format.\n
+                                        Expected: <class_name> <confidence> <left> <top> <right> <bottom>\n
+                                        Received: {line} \n"""
+                        print(error_msg)
+                    if tmp_class_name == class_name:
+                        # print("match")
+                        bbox = left + " " + top + " " + right + " " + bottom
+                        bounding_boxes.append({"confidence": confidence, "file_id": file_id, "bbox": bbox})
+            # sort detection-results by decreasing confidence
+            bounding_boxes.sort(key=lambda x: float(x['confidence']), reverse=True)
+            with open(temp_json_folder_path + "/" + class_name + "_dr.json", 'w') as outfile:
+                json.dump(bounding_boxes, outfile)
+        """
+         Calculate the AP for each class
+        """
+        sum_AP = 0.0
+        ap_dictionary = {}
+        # open file to store the output
+        with open(output_files_path + "/output.txt", 'w') as output_file:
+            output_file.write("# AP and precision/recall per class\n")
+            count_true_positives = {}
+            for class_index, class_name in enumerate(gt_classes):
+                count_true_positives[class_name] = 0
+                """
+                 Load detection-results of that class
+                """
+                dr_file = temp_json_folder_path + "/" + class_name + "_dr.json"
+                dr_data = json.load(open(dr_file))
+                """
+                 Assign detection-results to ground-truth objects
+                """
+                nd = len(dr_data)
+                tp = [0] * nd  # creates an array of zeros of size nd
+                fp = [0] * nd
+                for idx, detection in enumerate(dr_data):
+                    file_id = detection["file_id"]
+                    gt_file = temp_json_folder_path + "/" + file_id + "_ground_truth.json"
+                    ground_truth_data = json.load(open(gt_file))
+                    ovmax = -1
+                    gt_match = -1
+                    # load detected object bounding-box
+                    bb = [float(x) for x in detection["bbox"].split()]
+                    for obj in ground_truth_data:
+                        # look for a class_name match
+                        if obj["class_name"] == class_name:
+                            bbgt = [float(x) for x in obj["bbox"].split()]
+                            bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])]
+                            iw = bi[2] - bi[0] + 1
+                            ih = bi[3] - bi[1] + 1
+                            if iw > 0 and ih > 0:
+                                # compute overlap (IoU) = area of intersection / area of union
+                                ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + \
+                                     (bbgt[2] - bbgt[0]+ 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
+                                ov = iw * ih / ua
+                                if ov > ovmax:
+                                    ovmax = ov
+                                    gt_match = obj
+                    min_overlap = 0.5
+                    if ovmax >= min_overlap:
+                        # if "difficult" not in gt_match:
+                        if not bool(gt_match["used"]):
+                            # true positive
+                            tp[idx] = 1
+                            gt_match["used"] = True
+                            count_true_positives[class_name] += 1
+                            # update the ".json" file
+                            with open(gt_file, 'w') as f:
+                                f.write(json.dumps(ground_truth_data))
+                        else:
+                            # false positive (multiple detection)
+                            fp[idx] = 1
+                    else:
+                        fp[idx] = 1
+                # compute precision/recall
+                cumsum = 0
+                for idx, val in enumerate(fp):
+                    fp[idx] += cumsum
+                    cumsum += val
+                print('fp ', cumsum)
+                cumsum = 0
+                for idx, val in enumerate(tp):
+                    tp[idx] += cumsum
+                    cumsum += val
+                print('tp ', cumsum)
+                rec = tp[:]
+                for idx, val in enumerate(tp):
+                    rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
+                print('recall ', cumsum)
+                prec = tp[:]
+                for idx, val in enumerate(tp):
+                    prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
+                print('prec ', cumsum)
+                ap, mrec, mprec = voc_ap(rec[:], prec[:])
+                sum_AP += ap
+                text = "{0:.2f}%".format(
+                    ap * 100) + " = " + class_name + " AP "  # class_name + " AP = {0:.2f}%".format(ap*100)
+                print(text)
+                ap_dictionary[class_name] = ap
+                n_images = counter_images_per_class[class_name]
+                # lamr, mr, fppi = log_average_miss_rate(np.array(prec), np.array(rec), n_images)
+                # lamr_dictionary[class_name] = lamr
+                """
+                 Draw plot
+                """
+                if True:
+                    plt.plot(rec, prec, '-o')
+                    # add a new penultimate point to the list (mrec[-2], 0.0)
+                    # since the last line segment (and respective area) do not affect the AP value
+                    area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]]
+                    area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]]
+                    plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r')
+                    # set window title
+                    fig = plt.gcf()  # gcf - get current figure
+                    fig.canvas.set_window_title('AP ' + class_name)
+                    # set plot title
+                    plt.title('class: ' + text)
+                    # plt.suptitle('This is a somewhat long figure title', fontsize=16)
+                    # set axis titles
+                    plt.xlabel('Recall')
+                    plt.ylabel('Precision')
+                    # optional - set axes
+                    axes = plt.gca()  # gca - get current axes
+                    axes.set_xlim([0.0, 1.0])
+                    axes.set_ylim([0.0, 1.05])  # .05 to give some extra space
+                    # Alternative option -> wait for button to be pressed
+                    # while not plt.waitforbuttonpress(): pass # wait for key display
+                    # Alternative option -> normal display
+                    plt.show()
+                    # save the plot
+                    # fig.savefig(output_files_path + "/classes/" + class_name + ".png")
+                    # plt.cla()  # clear axes for next plot
+            # if show_animation:
+            #     cv2.destroyAllWindows()
+            output_file.write("\n# mAP of all classes\n")
+            mAP = sum_AP / n_classes
+            text = "mAP = {0:.2f}%".format(mAP * 100)
+            output_file.write(text + "\n")
+            print(text)
+        """
+         Count total of detection-results
+        """
+        # iterate through all the files
+        det_counter_per_class = {}
+        for txt_file in dr_files_list:
+            # get lines to list
+            lines_list = read_txt_to_list(txt_file)
+            for line in lines_list:
+                class_name = line.split()[0]
+                # check if class is in the ignore list, if yes skip
+                # if class_name in args.ignore:
+                #     continue
+                # count that object
+                if class_name in det_counter_per_class:
+                    det_counter_per_class[class_name] += 1
+                else:
+                    # if class didn't exist yet
+                    det_counter_per_class[class_name] = 1
+        # print(det_counter_per_class)
+        dr_classes = list(det_counter_per_class.keys())
+        """
+         Plot the total number of occurences of each class in the ground-truth
+        """
+        if True:
+            window_title = "ground-truth-info"
+            plot_title = "ground-truth\n"
+            plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)"
+            x_label = "Number of objects per class"
+            output_path = output_files_path + "/ground-truth-info.png"
+            to_show = False
+            plot_color = 'forestgreen'
+            draw_plot_func(
+                gt_counter_per_class,
+                n_classes,
+                window_title,
+                plot_title,
+                x_label,
+                output_path,
+                to_show,
+                plot_color,
+                '',
+            )
+        """
+         Finish counting true positives
+        """
+        for class_name in dr_classes:
+            # if class exists in detection-result but not in ground-truth then there are no true positives in that class
+            if class_name not in gt_classes:
+                count_true_positives[class_name] = 0
+        # print(count_true_positives)
+        """
+         Plot the total number of occurences of each class in the "detection-results" folder
+        """
+        if True:
+            window_title = "detection-results-info"
+            # Plot title
+            plot_title = "detection-results\n"
+            plot_title += "(" + str(len(dr_files_list)) + " files and "
+            count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values()))
+            plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)"
+            # end Plot title
+            x_label = "Number of objects per class"
+            output_path = output_files_path + "/detection-results-info.png"
+            to_show = False
+            plot_color = 'forestgreen'
+            true_p_bar = count_true_positives
+            draw_plot_func(
+                det_counter_per_class,
+                len(det_counter_per_class),
+                window_title,
+                plot_title,
+                x_label,
+                output_path,
+                to_show,
+                plot_color,
+                true_p_bar
+            )
+        """
+         Draw mAP plot (Show AP's of all classes in decreasing order)
+        """
+        if True:
+            window_title = "mAP"
+            plot_title = "mAP = {0:.2f}%".format(mAP * 100)
+            x_label = "Average Precision"
+            output_path = output_files_path + "/mAP.png"
+            to_show = True
+            plot_color = 'royalblue'
+            draw_plot_func(
+                ap_dictionary,
+                n_classes,
+                window_title,
+                plot_title,
+                x_label,
+                output_path,
+                to_show,
+                plot_color,
+                ""
+            )
+    def predict_raw(self, img_path):
+        raw_img = cv2.imread(img_path)
+        print('img shape: ', raw_img.shape)
+        img = self.preprocess_img(raw_img)
+        imgs = np.expand_dims(img, axis=0)
+        return self.yolo_model.predict(imgs)
+    def predict_nonms(self, img_path, iou_threshold=0.413, score_threshold=0.1):
+        raw_img = cv2.imread(img_path)
+        print('img shape: ', raw_img.shape)
+        img = self.preprocess_img(raw_img)
+        imgs = np.expand_dims(img, axis=0)
+        yolov4_output = self.yolo_model.predict(imgs)
+        output = yolov4_head(yolov4_output, self.num_classes, self.anchors, self.xyscale)
+        pred_output = nms(output, self.img_size, self.num_classes, iou_threshold, score_threshold)
+        pred_output = [p.numpy() for p in pred_output]
+        detections = get_detection_data(img=raw_img,
+                                        model_outputs=pred_output,
+                                        class_names=self.class_names)
+        draw_bbox(raw_img, detections, cmap=self.class_color, random_color=True)
+        return detections

requirements.txt ADDED Viewed

	@@ -0,0 +1,96 @@

+absl-py==1.3.0
+aiohttp==3.8.3
+aiosignal==1.3.1
+anyio==3.6.2
+astunparse==1.6.3
+async-timeout==4.0.2
+attrs==22.1.0
+bcrypt==4.0.1
+cachetools==5.2.0
+certifi==2022.9.24
+cffi==1.15.1
+charset-normalizer==2.1.1
+click==8.1.3
+colorama==0.4.6
+contourpy==1.0.6
+cryptography==38.0.3
+cycler==0.11.0
+fastapi==0.87.0
+ffmpy==0.3.0
+flatbuffers==22.10.26
+fonttools==4.38.0
+frozenlist==1.3.3
+fsspec==2022.11.0
+gast==0.4.0
+google-auth==2.14.1
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+gradio==3.10.0
+grpcio==1.50.0
+h11==0.12.0
+h5py==3.7.0
+httpcore==0.15.0
+httpx==0.23.1
+idna==3.4
+importlib-metadata==5.0.0
+Jinja2==3.1.2
+joblib==1.2.0
+keras==2.11.0
+kiwisolver==1.4.4
+libclang==14.0.6
+linkify-it-py==1.0.3
+Markdown==3.4.1
+markdown-it-py==2.1.0
+MarkupSafe==2.1.1
+matplotlib==3.6.2
+mdit-py-plugins==0.3.1
+mdurl==0.1.2
+multidict==6.0.2
+numpy==1.23.4
+oauthlib==3.2.2
+opencv-python==4.6.0.66
+opt-einsum==3.3.0
+orjson==3.8.1
+packaging==21.3
+pandas==1.5.1
+paramiko==2.12.0
+Pillow==9.3.0
+protobuf==3.19.6
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.21
+pycryptodome==3.15.0
+pydantic==1.10.2
+pydub==0.25.1
+PyNaCl==1.5.0
+pyparsing==3.0.9
+python-dateutil==2.8.2
+python-multipart==0.0.5
+pytz==2022.6
+PyYAML==6.0
+requests==2.28.1
+requests-oauthlib==1.3.1
+rfc3986==1.5.0
+rsa==4.9
+scikit-learn==1.1.3
+scipy==1.9.3
+six==1.16.0
+sniffio==1.3.0
+starlette==0.21.0
+tensorboard==2.11.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.11.0
+tensorflow-estimator==2.11.0
+termcolor==2.1.0
+threadpoolctl==3.1.0
+tqdm==4.64.1
+typing_extensions==4.4.0
+uc-micro-py==1.0.1
+urllib3==1.26.12
+uvicorn==0.19.0
+websockets==10.4
+Werkzeug==2.2.2
+wrapt==1.14.1
+yarl==1.8.1
+zipp==3.10.0

utils.py ADDED Viewed

	@@ -0,0 +1,475 @@

+import numpy as np
+import cv2
+import pandas as pd
+import operator
+import matplotlib.pyplot as plt
+import os
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.utils import Sequence
+from config import yolo_config
+def load_weights(model, weights_file_path):
+    conv_layer_size = 110
+    conv_output_idxs = [93, 101, 109]
+    with open(weights_file_path, 'rb') as file:
+        major, minor, revision, seen, _ = np.fromfile(file, dtype=np.int32, count=5)
+        bn_idx = 0
+        for conv_idx in range(conv_layer_size):
+            conv_layer_name = f'conv2d_{conv_idx}' if conv_idx > 0 else 'conv2d'
+            bn_layer_name = f'batch_normalization_{bn_idx}' if bn_idx > 0 else 'batch_normalization'
+            conv_layer = model.get_layer(conv_layer_name)
+            filters = conv_layer.filters
+            kernel_size = conv_layer.kernel_size[0]
+            input_dims = conv_layer.input_shape[-1]
+            if conv_idx not in conv_output_idxs:
+                # darknet bn layer weights: [beta, gamma, mean, variance]
+                bn_weights = np.fromfile(file, dtype=np.float32, count=4 * filters)
+                # tf bn layer weights: [gamma, beta, mean, variance]
+                bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
+                bn_layer = model.get_layer(bn_layer_name)
+                bn_idx += 1
+            else:
+                conv_bias = np.fromfile(file, dtype=np.float32, count=filters)
+            # darknet shape: (out_dim, input_dims, height, width)
+            # tf shape: (height, width, input_dims, out_dim)
+            conv_shape = (filters, input_dims, kernel_size, kernel_size)
+            conv_weights = np.fromfile(file, dtype=np.float32, count=np.product(conv_shape))
+            conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
+            if conv_idx not in conv_output_idxs:
+                conv_layer.set_weights([conv_weights])
+                bn_layer.set_weights(bn_weights)
+            else:
+                conv_layer.set_weights([conv_weights, conv_bias])
+        if len(file.read()) == 0:
+            print('all weights read')
+        else:
+            print(f'failed to read  all weights, # of unread weights: {len(file.read())}')
+def get_detection_data(img, model_outputs, class_names):
+    """
+    :param img: target raw image
+    :param model_outputs: outputs from inference_model
+    :param class_names: list of object class names
+    :return:
+    """
+    num_bboxes = model_outputs[-1][0]
+    boxes, scores, classes = [output[0][:num_bboxes] for output in model_outputs[:-1]]
+    h, w = img.shape[:2]
+    df = pd.DataFrame(boxes, columns=['x1', 'y1', 'x2', 'y2'])
+    df[['x1', 'x2']] = (df[['x1', 'x2']] * w).astype('int64')
+    df[['y1', 'y2']] = (df[['y1', 'y2']] * h).astype('int64')
+    df['class_name'] = np.array(class_names)[classes.astype('int64')]
+    df['score'] = scores
+    df['w'] = df['x2'] - df['x1']
+    df['h'] = df['y2'] - df['y1']
+    print(f'# of bboxes: {num_bboxes}')
+    return df
+def read_annotation_lines(annotation_path, test_size=None, random_seed=5566):
+    with open(annotation_path) as f:
+        lines = f.readlines()
+    if test_size:
+        return train_test_split(lines, test_size=test_size, random_state=random_seed)
+    else:
+        return lines
+def draw_bbox(img, detections, cmap, random_color=True, figsize=(10, 10), show_img=True, show_text=True):
+    """
+    Draw bounding boxes on the img.
+    :param img: BGR img.
+    :param detections: pandas DataFrame containing detections
+    :param random_color: assign random color for each objects
+    :param cmap: object colormap
+    :param plot_img: if plot img with bboxes
+    :return: None
+    """
+    img = np.array(img)
+    scale = max(img.shape[0:2]) / 416
+    line_width = int(2 * scale)
+    for _, row in detections.iterrows():
+        x1, y1, x2, y2, cls, score, w, h = row.values
+        color = list(np.random.random(size=3) * 255) if random_color else cmap[cls]
+        cv2.rectangle(img, (x1, y1), (x2, y2), color, line_width)
+        if show_text:
+            text = f'{cls} {score:.2f}'
+            font = cv2.FONT_HERSHEY_DUPLEX
+            font_scale = max(0.3 * scale, 0.3)
+            thickness = max(int(1 * scale), 1)
+            (text_width, text_height) = cv2.getTextSize(text, font, fontScale=font_scale, thickness=thickness)[0]
+            cv2.rectangle(img, (x1 - line_width//2, y1 - text_height), (x1 + text_width, y1), color, cv2.FILLED)
+            cv2.putText(img, text, (x1, y1), font, font_scale, (255, 255, 255), thickness, cv2.LINE_AA)
+    if show_img:
+        plt.figure(figsize=figsize)
+        plt.imshow(img)
+        plt.show()
+    return img
+class DataGenerator(Sequence):
+    """
+    Generates data for Keras
+    ref: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
+    """
+    def __init__(self,
+                 annotation_lines,
+                 class_name_path,
+                 folder_path,
+                 max_boxes=100,
+                 shuffle=True):
+        self.annotation_lines = annotation_lines
+        self.class_name_path = class_name_path
+        self.num_classes = len([line.strip() for line in open(class_name_path).readlines()])
+        self.num_gpu = yolo_config['num_gpu']
+        self.batch_size = yolo_config['batch_size'] * self.num_gpu
+        self.target_img_size = yolo_config['img_size']
+        self.anchors = np.array(yolo_config['anchors']).reshape((9, 2))
+        self.shuffle = shuffle
+        self.indexes = np.arange(len(self.annotation_lines))
+        self.folder_path = folder_path
+        self.max_boxes = max_boxes
+        self.on_epoch_end()
+    def __len__(self):
+        'number of batches per epoch'
+        return int(np.ceil(len(self.annotation_lines) / self.batch_size))
+    def __getitem__(self, index):
+        'Generate one batch of data'
+        # Generate indexes of the batch
+        idxs = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
+        # Find list of IDs
+        lines = [self.annotation_lines[i] for i in idxs]
+        # Generate data
+        X, y_tensor, y_bbox = self.__data_generation(lines)
+        return [X, *y_tensor, y_bbox], np.zeros(len(lines))
+    def on_epoch_end(self):
+        'Updates indexes after each epoch'
+        if self.shuffle:
+            np.random.shuffle(self.indexes)
+    def __data_generation(self, annotation_lines):
+        """
+        Generates data containing batch_size samples
+        :param annotation_lines:
+        :return:
+        """
+        X = np.empty((len(annotation_lines), *self.target_img_size), dtype=np.float32)
+        y_bbox = np.empty((len(annotation_lines), self.max_boxes, 5), dtype=np.float32)  # x1y1x2y2
+        for i, line in enumerate(annotation_lines):
+            img_data, box_data = self.get_data(line)
+            X[i] = img_data
+            y_bbox[i] = box_data
+        y_tensor, y_true_boxes_xywh = preprocess_true_boxes(y_bbox, self.target_img_size[:2], self.anchors, self.num_classes)
+        return X, y_tensor, y_true_boxes_xywh
+    def get_data(self, annotation_line):
+        line = annotation_line.split()
+        img_path = line[0]
+        img = cv2.imread(os.path.join(self.folder_path, img_path))[:, :, ::-1]
+        ih, iw = img.shape[:2]
+        h, w, c = self.target_img_size
+        boxes = np.array([np.array(list(map(float, box.split(',')))) for box in line[1:]], dtype=np.float32) # x1y1x2y2
+        scale_w, scale_h = w / iw, h / ih
+        img = cv2.resize(img, (w, h))
+        image_data = np.array(img) / 255.
+        # correct boxes coordinates
+        box_data = np.zeros((self.max_boxes, 5))
+        if len(boxes) > 0:
+            np.random.shuffle(boxes)
+            boxes = boxes[:self.max_boxes]
+            boxes[:, [0, 2]] = boxes[:, [0, 2]] * scale_w  # + dx
+            boxes[:, [1, 3]] = boxes[:, [1, 3]] * scale_h  # + dy
+            box_data[:len(boxes)] = boxes
+        return image_data, box_data
+def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
+    '''Preprocess true boxes to training input format
+    Parameters
+    ----------
+    true_boxes: array, shape=(bs, max boxes per img, 5)
+        Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
+    input_shape: array-like, hw, multiples of 32
+    anchors: array, shape=(N, 2), (9, wh)
+    num_classes: int
+    Returns
+    -------
+    y_true: list of array, shape like yolo_outputs, xywh are reletive value
+    '''
+    num_stages = 3  # default setting for yolo, tiny yolo will be 2
+    anchor_mask = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+    bbox_per_grid = 3
+    true_boxes = np.array(true_boxes, dtype='float32')
+    true_boxes_abs = np.array(true_boxes, dtype='float32')
+    input_shape = np.array(input_shape, dtype='int32')
+    true_boxes_xy = (true_boxes_abs[..., 0:2] + true_boxes_abs[..., 2:4]) // 2  # (100, 2)
+    true_boxes_wh = true_boxes_abs[..., 2:4] - true_boxes_abs[..., 0:2]  # (100, 2)
+    # Normalize x,y,w, h, relative to img size -> (0~1)
+    true_boxes[..., 0:2] = true_boxes_xy/input_shape[::-1]  # xy
+    true_boxes[..., 2:4] = true_boxes_wh/input_shape[::-1]  # wh
+    bs = true_boxes.shape[0]
+    grid_sizes = [input_shape//{0:8, 1:16, 2:32}[stage] for stage in range(num_stages)]
+    y_true = [np.zeros((bs,
+                        grid_sizes[s][0],
+                        grid_sizes[s][1],
+                        bbox_per_grid,
+                        5+num_classes), dtype='float32')
+              for s in range(num_stages)]
+    # [(?, 52, 52, 3, 5+num_classes) (?, 26, 26, 3, 5+num_classes)  (?, 13, 13, 3, 5+num_classes) ]
+    y_true_boxes_xywh = np.concatenate((true_boxes_xy, true_boxes_wh), axis=-1)
+    # Expand dim to apply broadcasting.
+    anchors = np.expand_dims(anchors, 0)  # (1, 9 , 2)
+    anchor_maxes = anchors / 2.  # (1, 9 , 2)
+    anchor_mins = -anchor_maxes  # (1, 9 , 2)
+    valid_mask = true_boxes_wh[..., 0] > 0  # (1, 100)
+    for batch_idx in range(bs):
+        # Discard zero rows.
+        wh = true_boxes_wh[batch_idx, valid_mask[batch_idx]]  # (# of bbox, 2)
+        num_boxes = len(wh)
+        if num_boxes == 0: continue
+        wh = np.expand_dims(wh, -2)  # (# of bbox, 1, 2)
+        box_maxes = wh / 2.  # (# of bbox, 1, 2)
+        box_mins = -box_maxes  # (# of bbox, 1, 2)
+        # Compute IoU between each anchors and true boxes for responsibility assignment
+        intersect_mins = np.maximum(box_mins, anchor_mins)  # (# of bbox, 9, 2)
+        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
+        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
+        intersect_area = np.prod(intersect_wh, axis=-1)  # (9,)
+        box_area = wh[..., 0] * wh[..., 1]  # (# of bbox, 1)
+        anchor_area = anchors[..., 0] * anchors[..., 1]  # (1, 9)
+        iou = intersect_area / (box_area + anchor_area - intersect_area)  # (# of bbox, 9)
+        # Find best anchor for each true box
+        best_anchors = np.argmax(iou, axis=-1)  # (# of bbox,)
+        for box_idx in range(num_boxes):
+            best_anchor = best_anchors[box_idx]
+            for stage in range(num_stages):
+                if best_anchor in anchor_mask[stage]:
+                    x_offset = true_boxes[batch_idx, box_idx, 0]*grid_sizes[stage][1]
+                    y_offset = true_boxes[batch_idx, box_idx, 1]*grid_sizes[stage][0]
+                    # Grid Index
+                    grid_col = np.floor(x_offset).astype('int32')
+                    grid_row = np.floor(y_offset).astype('int32')
+                    anchor_idx = anchor_mask[stage].index(best_anchor)
+                    class_idx = true_boxes[batch_idx, box_idx, 4].astype('int32')
+                    # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 0] = x_offset - grid_col  # x
+                    # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 1] = y_offset - grid_row  # y
+                    # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, :4] = true_boxes_abs[batch_idx, box_idx, :4] # abs xywh
+                    y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, :2] = true_boxes_xy[batch_idx, box_idx, :]  # abs xy
+                    y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 2:4] = true_boxes_wh[batch_idx, box_idx, :]  # abs wh
+                    y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 4] = 1  # confidence
+                    y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 5+class_idx] = 1  # one-hot encoding
+                    # smooth
+                    # onehot = np.zeros(num_classes, dtype=np.float)
+                    # onehot[class_idx] = 1.0
+                    # uniform_distribution = np.full(num_classes, 1.0 / num_classes)
+                    # delta = 0.01
+                    # smooth_onehot = onehot * (1 - delta) + delta * uniform_distribution
+                    # y_true[stage][batch_idx, grid_row, grid_col, anchor_idx, 5:] = smooth_onehot
+    return y_true, y_true_boxes_xywh
+"""
+ Calculate the AP given the recall and precision array
+    1st) We compute a version of the measured precision/recall curve with
+         precision monotonically decreasing
+    2nd) We compute the AP as the area under this curve by numerical integration.
+"""
+def voc_ap(rec, prec):
+    """
+    --- Official matlab code VOC2012---
+    mrec=[0 ; rec ; 1];
+    mpre=[0 ; prec ; 0];
+    for i=numel(mpre)-1:-1:1
+            mpre(i)=max(mpre(i),mpre(i+1));
+    end
+    i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    rec.insert(0, 0.0) # insert 0.0 at begining of list
+    rec.append(1.0) # insert 1.0 at end of list
+    mrec = rec[:]
+    prec.insert(0, 0.0) # insert 0.0 at begining of list
+    prec.append(0.0) # insert 0.0 at end of list
+    mpre = prec[:]
+    """
+     This part makes the precision monotonically decreasing
+        (goes from the end to the beginning)
+        matlab: for i=numel(mpre)-1:-1:1
+                    mpre(i)=max(mpre(i),mpre(i+1));
+    """
+    # matlab indexes start in 1 but python in 0, so I have to do:
+    #     range(start=(len(mpre) - 2), end=0, step=-1)
+    # also the python function range excludes the end, resulting in:
+    #     range(start=(len(mpre) - 2), end=-1, step=-1)
+    for i in range(len(mpre)-2, -1, -1):
+        mpre[i] = max(mpre[i], mpre[i+1])
+    """
+     This part creates a list of indexes where the recall changes
+        matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
+    """
+    i_list = []
+    for i in range(1, len(mrec)):
+        if mrec[i] != mrec[i-1]:
+            i_list.append(i) # if it was matlab would be i + 1
+    """
+     The Average Precision (AP) is the area under the curve
+        (numerical integration)
+        matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
+    """
+    ap = 0.0
+    for i in i_list:
+        ap += ((mrec[i]-mrec[i-1])*mpre[i])
+    return ap, mrec, mpre
+"""
+ Draw plot using Matplotlib
+"""
+def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar):
+    # sort the dictionary by decreasing value, into a list of tuples
+    sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1))
+    print(sorted_dic_by_value)
+    # unpacking the list of tuples into two lists
+    sorted_keys, sorted_values = zip(*sorted_dic_by_value)
+    #
+    if true_p_bar != "":
+        """
+         Special case to draw in:
+            - green -> TP: True Positives (object detected and matches ground-truth)
+            - red -> FP: False Positives (object detected but does not match ground-truth)
+            - pink -> FN: False Negatives (object not detected but present in the ground-truth)
+        """
+        fp_sorted = []
+        tp_sorted = []
+        for key in sorted_keys:
+            fp_sorted.append(dictionary[key] - true_p_bar[key])
+            tp_sorted.append(true_p_bar[key])
+        plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive')
+        plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted)
+        # add legend
+        plt.legend(loc='lower right')
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            fp_val = fp_sorted[i]
+            tp_val = tp_sorted[i]
+            fp_str_val = " " + str(fp_val)
+            tp_str_val = fp_str_val + " " + str(tp_val)
+            # trick to paint multicolor with offset:
+            # first paint everything and then repaint the first number
+            t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold')
+            plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold')
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    else:
+        plt.barh(range(n_classes), sorted_values, color=plot_color)
+        """
+         Write number on side of bar
+        """
+        fig = plt.gcf() # gcf - get current figure
+        axes = plt.gca()
+        r = fig.canvas.get_renderer()
+        for i, val in enumerate(sorted_values):
+            str_val = " " + str(val) # add a space before
+            if val < 1.0:
+                str_val = " {0:.2f}".format(val)
+            t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold')
+            # re-set axes to show number inside the figure
+            if i == (len(sorted_values)-1): # largest bar
+                adjust_axes(r, t, fig, axes)
+    # set window title
+    fig.canvas.set_window_title(window_title)
+    # write classes in y axis
+    tick_font_size = 12
+    plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size)
+    """
+     Re-scale height accordingly
+    """
+    init_height = fig.get_figheight()
+    # comput the matrix height in points and inches
+    dpi = fig.dpi
+    height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing)
+    height_in = height_pt / dpi
+    # compute the required figure height
+    top_margin = 0.15 # in percentage of the figure height
+    bottom_margin = 0.05 # in percentage of the figure height
+    figure_height = height_in / (1 - top_margin - bottom_margin)
+    # set new height
+    if figure_height > init_height:
+        fig.set_figheight(figure_height)
+    # set plot title
+    plt.title(plot_title, fontsize=14)
+    # set axis titles
+    # plt.xlabel('classes')
+    plt.xlabel(x_label, fontsize='large')
+    # adjust size of window
+    fig.tight_layout()
+    # save the plot
+    fig.savefig(output_path)
+    # show image
+    # if to_show:
+    plt.show()
+    # close the plot
+    # plt.close()
+"""
+ Plot - adjust axes
+"""
+def adjust_axes(r, t, fig, axes):
+    # get text width for re-scaling
+    bb = t.get_window_extent(renderer=r)
+    text_width_inches = bb.width / fig.dpi
+    # get axis width in inches
+    current_fig_width = fig.get_figwidth()
+    new_fig_width = current_fig_width + text_width_inches
+    propotion = new_fig_width / current_fig_width
+    # get axis limit
+    x_lim = axes.get_xlim()
+    axes.set_xlim([x_lim[0], x_lim[1]*propotion])
+def read_txt_to_list(path):
+    # open txt file lines to a list
+    with open(path) as f:
+        content = f.readlines()
+    # remove whitespace characters like `\n` at the end of each line
+    content = [x.strip() for x in content]
+    return content

xml_to_txt.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import xml.etree.ElementTree as ET
+import os
+from glob import glob
+XML_PATH = './dataset/xml'
+CLASSES_PATH = './class_names/classes.txt'
+TXT_PATH = './dataset/txt/anno.txt'
+'''loads the classes'''
+def get_classes(classes_path):
+    with open(classes_path) as f:
+        class_names = f.readlines()
+    class_names = [c.strip() for c in class_names]
+    return class_names
+classes = get_classes(CLASSES_PATH)
+assert len(classes) > 0, 'no class names detected!'
+print(f'num classes: {len(classes)}')
+# output file
+list_file = open(TXT_PATH, 'w')
+for path in glob(os.path.join(XML_PATH, '*.xml')):
+    in_file = open(path)
+    # Parse .xml file
+    tree = ET.parse(in_file)
+    root = tree.getroot()
+    # Write object information to .txt file
+    file_name = root.find('filename').text
+    print(file_name)
+    list_file.write(file_name)
+    for obj in root.iter('object'):
+        cls = obj.find('name').text
+        cls_id = classes.index(cls)
+        xmlbox = obj.find('bndbox')
+        b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
+        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
+    list_file.write('\n')
+list_file.close()

yolov4.weights ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8a4f6c62188738d86dc6898d82724ec0964d0eb9d2ae0f0a9d53d65d108d562
+size 257717640