File size: 5,111 Bytes
0c2c19f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd1ea7f
0c2c19f
 
c12cb7e
0c2c19f
 
 
 
 
 
 
 
c12cb7e
 
 
 
 
 
 
 
dd4f531
 
c12cb7e
 
 
 
0c2c19f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd1ea7f
 
0c2c19f
 
 
 
 
 
 
 
 
 
c12cb7e
0c2c19f
 
bd1ea7f
b2de2a8
bd1ea7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c12cb7e
bd1ea7f
 
 
 
 
 
 
c12cb7e
 
bd1ea7f
c12cb7e
bd1ea7f
 
c12cb7e
 
bd1ea7f
c12cb7e
bd1ea7f
 
 
c12cb7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd1ea7f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# YOLOv5 πŸš€ by Ultralytics, GPL-3.0 license

import os
import sys
from pathlib import Path
import cv2

FILE = Path(__file__).resolve()
ROOT = FILE.parents[0]  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH


ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

import torch
from yolov5.utils.torch_utils import select_device, time_sync
from yolov5.utils.plots import Annotator, colors, save_one_box
from yolov5.utils.general import (check_img_size,
                        increment_path, non_max_suppression, scale_coords, xyxy2xywh)
from yolov5.utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, pil_to_cv
from yolov5.models.common import DetectMultiBackend
import torchvision
import numpy as np

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    torchvision.transforms.Resize((224, 224)),
])


test_random_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.transforms.ToTensor(),
    torchvision.transforms.RandomRotation((-15, 15)),
    torchvision.transforms.RandomGrayscale(p=0.4),
    torchvision.transforms.RandomPerspective(0.4, p=0.4),
    torchvision.transforms.RandomAdjustSharpness(2),
    torchvision.transforms.RandomAffine(degrees=0, translate=None, scale=(0.9, 1.0)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
    torchvision.transforms.Resize((224, 224)),
])

def load_yolo_model(weights, device="cpu", imgsz=[1280, 1280]):
    # Load model
    device = select_device(device)
    model = DetectMultiBackend(weights, device=device, dnn=False, data=ROOT / 'data/coco128.yaml')
    stride, names, pt, jit, onnx, engine = model.stride, model.names, model.pt, model.jit, model.onnx, model.engine
    imgsz = check_img_size(imgsz, s=stride)  # check image size

    half = False
    # Half
    half &= (pt or jit or onnx or engine) and device.type != 'cpu'  # FP16 supported on limited backends with CUDA
    if pt or jit:
        model.model.half() if half else model.model.float()
    model.warmup(imgsz=(1, 3, *imgsz), half=half)
    
    return model, stride, names, pt, jit, onnx, engine


def predict(

        age_model,
        model,  # model.pt path(s)
        stride, 
        source=None,  # PIL Image
        imgsz=(640, 640),  # inference size (height, width)
        conf_thres=0.5,  # confidence threshold
        iou_thres=0.45,  # NMS IOU threshold
        max_det=1000,  # maximum detections per image
        device='cpu',  # cuda device, i.e. 0 or 0,1,2,3 or cpu
        classes=None,  # filter by class: --class 0, or --class 0 2 3
        agnostic_nms=False,  # class-agnostic NMS
        augment=False,  # augmented inference
        visualize=False,  # visualize features
        half=False,  # use FP16 half-precision inference
        with_random_augs = False
        ):

    im, im0 = pil_to_cv(source, img_size=imgsz[0], stride=stride)
    
    im = torch.from_numpy(im).to(device)
    im = im.half() if half else im.float()  # uint8 to fp16/32
    im /= 255  # 0 - 255 to 0.0 - 1.0
    if len(im.shape) == 3:
        im = im[None]  # expand for batch dim
   
    # Inference
    visualize = False
    pred = model(im, augment=augment, visualize=visualize)

    # NMS
    pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)

    # Process predictions
    preds = []

    for i, det in enumerate(pred):  # per image
        
        # im0 = im0.copy()
        
        if len(det):
            # Rescale boxes from img_size to im0 size
            det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
            
            for *xyxy, conf, _ in reversed(det):

                ages = []
                face = im0[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
                face_img = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
                
                # inference with original crop
                im = test_transforms(face_img).unsqueeze_(0)
                
                with torch.no_grad():
                    y = age_model(im)
                
                age = y[0].item()
                ages.append(age)

                if with_random_augs:
                    # inference with random augmentations
                    for k in range(12):
                        im = test_random_transforms(face_img).unsqueeze_(0)
                    
                        with torch.no_grad():
                            y = age_model(im)
                        
                        age = y[0].item()

                        ages.append(age)

                preds.append({"class": str(int( np.mean(np.array(ages), axis=0))), "xmin": int(xyxy[0]), "ymin": int(xyxy[1]), "xmax": int(xyxy[2]),"ymax": int(xyxy[3]), "conf": float(conf)})
                
    return preds