Spaces:

ozyman
/

fasd

Runtime error

App Files Files Community

ozyman commited on Apr 3, 2023

Commit

73cb092

1 Parent(s): a3d3525

refactor, add thresh, save fix

Browse files

Files changed (1) hide show

app.py +107 -80

app.py CHANGED Viewed

@@ -36,8 +36,9 @@ app_version = 'ddn1'
 device = torch.device("cpu")
 labels = ['Live', 'Spoof']
-pix_threshhold = 0.45
-dsdg_threshold = 0.003
 examples = [
     ['examples/1_1_21_2_33_scene_fake.jpg'],
     ['examples/frame150_real.jpg'],
@@ -78,7 +79,7 @@ class Normaliztion_valtest(object):
         return image_x
-def prepare_data(images, boxes, depths):
     transform = transforms.Compose([Normaliztion_valtest()])
     files_total = 1
     image_x = np.zeros((files_total, 256, 256, 3))
@@ -86,10 +87,10 @@ def prepare_data(images, boxes, depths):
     for i, (image, bbox, depth_img) in enumerate(
             zip(images, boxes, depths)):
-        x, y, w, h = bbox
         depth_img = cv.cvtColor(depth_img, cv.COLOR_RGB2GRAY)
-        image = image[y:y + h, x:x + w]
-        depth_img = depth_img[y:y + h, x:x + w]
         image_x[i, :, :, :] = cv.resize(image, (256, 256))
         # transform to binary mask --> threshold = 0
@@ -100,89 +101,110 @@ def prepare_data(images, boxes, depths):
     depth_x = torch.from_numpy(depth_x.astype(float)).float()
     return image_x, depth_x
 def find_largest_face(faces):
     largest_face = None
     largest_area = 0
-    for (x, y, w, h) in faces:
         area = w * h
         if area > largest_area:
             largest_area = area
-            largest_face = (x, y, w, h)
     return largest_face
-def inference(img):
     if img is None:
-        return None, {}, None, None, {}, None, None
     grey = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
     faces = faceClassifier.detectMultiScale(
         grey, scaleFactor=1.1, minNeighbors=4)
-    face = find_largest_face(faces)
     if face is not None:
         x, y, w, h = face
         x2 = x + w
         y2 = y + h
-        faceRegion = img[y:y2, x:x2]
-        faceRegion = tfms(faceRegion)
-        faceRegion = faceRegion.unsqueeze(0)
-        # if model_name == 'DeePixBiS':
-        mask, binary = deepix_model.forward(faceRegion)
-        res_deepix = torch.mean(mask).item()
-        cls_deepix = 'Real' if res_deepix >= pix_threshhold else 'Spoof'
-        confidences_deepix = {'Real confidence': res_deepix}
-        color_deepix = (0, 255, 0) if cls_deepix == 'Real' else (255, 0, 0)
-        img_deepix = cv.rectangle(img.copy(), (x, y), (x2, y2), color_deepix, 2)
-        cv.putText(img_deepix, cls_deepix, (x, y2 + 30),
-                    cv.FONT_HERSHEY_COMPLEX, 1, color_deepix)
-        # else:
-        dense_flag = True
-        box = [x, y, x2, y2, 1]
-        param_lst, roi_box_lst = tddfa(img, [box])
-        ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)
-        depth_img = depth(img, ver_lst, tddfa.tri, with_bg_flag=False)
-        with torch.no_grad():
-            map_score_list = []
-            image_x, map_x = prepare_data([img], [list(face)], [depth_img])
-            # get the inputs
-            image_x = image_x.unsqueeze(0)
-            map_x = map_x.unsqueeze(0)
-            inputs = image_x.to(device)
-            test_maps = map_x.to(device)
-            optimizer.zero_grad()
-            map_score = 0.0
-            for frame_t in range(inputs.shape[1]):
-                mu, logvar, map_x, x_concat, x_Block1, x_Block2, x_Block3, x_input = cdcn_model(inputs[:, frame_t, :, :, :])
-                score_norm = torch.sum(mu) / torch.sum(test_maps[:, frame_t, :, :])
-                map_score += score_norm
-            map_score = map_score / inputs.shape[1]
-            map_score_list.append(map_score)
-        res_dsdg = map_score_list[0].item()
-        if res_dsdg > 10:
-            res_dsdg = 0.0
-        cls_dsdg = 'Real' if res_dsdg >= dsdg_threshold else 'Spoof'
-        res_dsdg = res_dsdg * 300
-        confidences_dsdg = {'Real confidence': res_dsdg}
-        color_dsdg = (0, 255, 0) if cls_dsdg == 'Real' else (255, 0, 0)
-        img_dsdg = cv.rectangle(img.copy(), (x, y), (x2, y2), color_dsdg, 2)
-        cv.putText(img_dsdg, cls_dsdg, (x, y2 + 30),
-                    cv.FONT_HERSHEY_COMPLEX, 1, color_dsdg)
-        cls_deepix, cls_dsdg = [1 if cls_ == 'Real' else 0 for cls_ in [cls_deepix, cls_dsdg]]
-        return img_deepix, confidences_deepix, img_dsdg, confidences_dsdg, cls_deepix, cls_dsdg
     else:
-        return img, {}, img, {}, None, None
 def upload_to_s3(image_array, app_version, *labels):
@@ -199,12 +221,12 @@ def upload_to_s3(image_array, app_version, *labels):
     s3 = boto3.client('s3')
     # Encode labels and app version in image file name
-    encoded_labels = '_'.join([str(label) for label in labels])
     random_string = str(uuid.uuid4()).split('-')[-1]
     image_name = f"{folder}/{app_version}/{encoded_labels}_{random_string}.jpg"
     # Save image as JPEG
-    image = Image.fromarray(np.uint8(image_array * 255))
     image_bytes = io.BytesIO()
     image.save(image_bytes, format='JPEG')
     image_bytes.seek(0)
@@ -222,25 +244,30 @@ demo = gr.Blocks()
 with demo:
     with gr.Row():
         with gr.Column():
-            input_img = gr.Image(source='webcam', shape=None, type='numpy')
             btn_run = gr.Button(value="Run")
         with gr.Column():
             outputs=[
                 gr.Image(label='DeePixBiS', type='numpy'),
                 gr.Label(num_top_classes=2, label='DeePixBiS'),
                 gr.Image(label='DSDG', type='numpy'),
-                gr.Label(num_top_classes=2, label='DSDG')]
         with gr.Column():
             radio = gr.Radio(
-                ["Real", "Spoof", "None"], label="True label", type='index')
             flag = gr.Button(value="Flag")
             status = gr.Textbox()
-            example_block = gr.Examples(examples, [input_img], outputs+labels)
-    labels = [gr.Number(visible=False, value=-1), gr.Number(visible=False, value=-1)]
-    btn_run.click(inference, [input_img], outputs+labels)
     app_version_block = gr.Textbox(value=app_version, visible=False)
-    flag.click(upload_to_s3, [input_img, app_version_block, radio]+labels, [status], show_progress=True)
 if __name__ == '__main__':

 device = torch.device("cpu")
 labels = ['Live', 'Spoof']
+PIX_THRESHOLD = 0.45
+DSDG_THRESHOLD = 80
+MIN_FACE_WIDTH_THRESHOLD = 210
 examples = [
     ['examples/1_1_21_2_33_scene_fake.jpg'],
     ['examples/frame150_real.jpg'],
         return image_x
+def prepare_data_dsdg(images, boxes, depths):
     transform = transforms.Compose([Normaliztion_valtest()])
     files_total = 1
     image_x = np.zeros((files_total, 256, 256, 3))
     for i, (image, bbox, depth_img) in enumerate(
             zip(images, boxes, depths)):
+        x, y, x2, y2 = bbox
         depth_img = cv.cvtColor(depth_img, cv.COLOR_RGB2GRAY)
+        image = image[y:y2, x:x2]
+        depth_img = depth_img[y:y2, x:x2]
         image_x[i, :, :, :] = cv.resize(image, (256, 256))
         # transform to binary mask --> threshold = 0
     depth_x = torch.from_numpy(depth_x.astype(float)).float()
     return image_x, depth_x
 def find_largest_face(faces):
+    # find the largest face in the list
     largest_face = None
     largest_area = 0
+    for face in faces:
+        x, y, w, h = face
         area = w * h
         if area > largest_area:
             largest_area = area
+            largest_face = face
     return largest_face
+def extract_face(img):
+    face = None
     if img is None:
+        return face
     grey = cv.cvtColor(img, cv.COLOR_RGB2GRAY)
     faces = faceClassifier.detectMultiScale(
         grey, scaleFactor=1.1, minNeighbors=4)
+    if len(faces):
+        face = find_largest_face(faces)
+    return face
+def deepix_model_inference(img, bbox):
+    x, y, x2, y2 = bbox
+    faceRegion = img[y:y2, x:x2]
+    faceRegion = tfms(faceRegion)
+    faceRegion = faceRegion.unsqueeze(0)
+    mask, binary = deepix_model.forward(faceRegion)
+    res_deepix = torch.mean(mask).item()
+    cls_deepix = 'Real' if res_deepix >= PIX_THRESHOLD else 'Spoof'
+    confidences_deepix = {'Real confidence': res_deepix}
+    color_deepix = (0, 255, 0) if cls_deepix == 'Real' else (255, 0, 0)
+    img_deepix = cv.rectangle(img.copy(), (x, y), (x2, y2), color_deepix, 2)
+    cv.putText(img_deepix, cls_deepix, (x, y2 + 30),
+                cv.FONT_HERSHEY_COMPLEX, 1, color_deepix)
+    cls_deepix = 1 if cls_deepix == 'Real' else 0
+    return img_deepix, confidences_deepix, cls_deepix
+def dsdg_model_inference(img, bbox, dsdg_thresh):
+    dsdg_thresh = dsdg_thresh / 30000
+    dense_flag = True
+    x, y, x2, y2 = bbox
+    w = x2 - x
+    h = y2 - y
+    if w < MIN_FACE_WIDTH_THRESHOLD:
+        color_dsdg = (0, 0, 0)
+        text = f'Small res ({w}*{h})'
+        img_dsdg = cv.rectangle(img.copy(), (x, y), (x2, y2), color_dsdg, 2)
+        cv.putText(img_dsdg, text, (x, y2 + 30),
+                   cv.FONT_HERSHEY_COMPLEX, 1, color_dsdg)
+        cls_dsdg = 2
+        return img_dsdg, {}, cls_dsdg
+    bbox_conf = list(bbox)
+    bbox_conf.append(1)
+    param_lst, roi_box_lst = tddfa(img, [bbox_conf])
+    ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)
+    depth_img = depth(img, ver_lst, tddfa.tri, with_bg_flag=False)
+    with torch.no_grad():
+        map_score_list = []
+        image_x, map_x = prepare_data_dsdg([img], [list(bbox)], [depth_img])
+        # get the inputs
+        image_x = image_x.unsqueeze(0)
+        map_x = map_x.unsqueeze(0)
+        inputs = image_x.to(device)
+        test_maps = map_x.to(device)
+        optimizer.zero_grad()
+        map_score = 0.0
+        for frame_t in range(inputs.shape[1]):
+            mu, logvar, map_x, x_concat, x_Block1, x_Block2, x_Block3, x_input = cdcn_model(inputs[:, frame_t, :, :, :])
+            score_norm = torch.sum(mu) / torch.sum(test_maps[:, frame_t, :, :])
+            map_score += score_norm
+        map_score = map_score / inputs.shape[1]
+        map_score_list.append(map_score)
+    res_dsdg = map_score_list[0].item()
+    if res_dsdg > 10:
+        res_dsdg = 0.0
+    cls_dsdg = 'Real' if res_dsdg >= dsdg_thresh else 'Spoof'
+    text = f'{cls_dsdg} {w}*{h}'
+    res_dsdg = res_dsdg * 300
+    confidences_dsdg = {'Real confidence': res_dsdg}
+    color_dsdg = (0, 255, 0) if cls_dsdg == 'Real' else (255, 0, 0)
+    img_dsdg = cv.rectangle(img.copy(), (x, y), (x2, y2), color_dsdg, 2)
+    cv.putText(img_dsdg, text, (x, y2 + 30),
+                cv.FONT_HERSHEY_COMPLEX, 1, color_dsdg)
+    cls_dsdg = 1 if cls_dsdg == 'Real' else 0
+    return img_dsdg, confidences_dsdg, cls_dsdg
+def inference(img, dsdg_thresh):
+    face = extract_face(img)
     if face is not None:
         x, y, w, h = face
         x2 = x + w
         y2 = y + h
+        bbox = (x, y, x2, y2)
+        img_deepix, confidences_deepix, cls_deepix = deepix_model_inference(img, bbox)
+        img_dsdg, confidences_dsdg, cls_dsdg = dsdg_model_inference(img, bbox, dsdg_thresh)
+        return img_deepix, confidences_deepix, cls_deepix, img_dsdg, confidences_dsdg, cls_dsdg
     else:
+        return img, {}, None, img, {}, None
 def upload_to_s3(image_array, app_version, *labels):
     s3 = boto3.client('s3')
     # Encode labels and app version in image file name
+    encoded_labels = '_'.join([str(int(label)) for label in labels])
     random_string = str(uuid.uuid4()).split('-')[-1]
     image_name = f"{folder}/{app_version}/{encoded_labels}_{random_string}.jpg"
     # Save image as JPEG
+    image = Image.fromarray(image_array)
     image_bytes = io.BytesIO()
     image.save(image_bytes, format='JPEG')
     image_bytes.seek(0)
 with demo:
     with gr.Row():
         with gr.Column():
+            input_img = gr.Image(source='webcam', shape=None, type='numpy', streaming=False)
+            dsdg_thresh = gr.Slider(value=DSDG_THRESHOLD, label='DSDG threshold')
             btn_run = gr.Button(value="Run")
         with gr.Column():
             outputs=[
                 gr.Image(label='DeePixBiS', type='numpy'),
                 gr.Label(num_top_classes=2, label='DeePixBiS'),
+                gr.Number(visible=False, value=-1),
                 gr.Image(label='DSDG', type='numpy'),
+                gr.Label(num_top_classes=2, label='DSDG'),
+                gr.Number(visible=False, value=-1)]
         with gr.Column():
             radio = gr.Radio(
+                ["Spoof", "Real", "None"], label="True label", type='index')
             flag = gr.Button(value="Flag")
             status = gr.Textbox()
+            example_block = gr.Examples(examples, [input_img], outputs)
+    btn_run.click(inference, [input_img, dsdg_thresh], outputs)
     app_version_block = gr.Textbox(value=app_version, visible=False)
+    flag.click(
+        upload_to_s3,
+        [input_img, app_version_block, radio]+[outputs[2], outputs[5]],
+        [status], show_progress=True)
 if __name__ == '__main__':