Spaces:

logasja
/

LowKey

Sleeping

App Files Files Community

Jacob Logas commited on Jul 17, 2024

Commit

1173b78

0 Parent(s):

first commit

Browse files

Files changed (21) hide show

.gitignore +162 -0
.pre-commit-config.yaml +16 -0
README.md +10 -0
align/__init__.py +0 -0
align/align_trans.py +295 -0
align/box_utils.py +239 -0
align/detector.py +133 -0
align/first_stage.py +100 -0
align/get_nets.py +165 -0
align/matlab_cp2tform.py +329 -0
app.py +116 -0
backbone/__init__.py +0 -0
backbone/model_irse.py +260 -0
backbone/model_resnet.py +205 -0
backbone/models2.py +427 -0
requirements-dev.txt +2 -0
requirements.txt +7 -0
util/__init__.py +0 -0
util/attack_utils.py +152 -0
util/feature_extraction_utils.py +109 -0
util/prepare_utils.py +271 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,16 @@

+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.5.2
+  hooks:
+    # Run the linter.
+    - id: ruff
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format

README.md ADDED Viewed

	@@ -0,0 +1,10 @@

+---
+title: LowKey
+emoji: 😒
+colorFrom: pink
+colorTo: blue
+sdk: gradio
+sdk_version: 4.38.1
+app_file: app.py
+pinned: false
+---

align/__init__.py ADDED Viewed

File without changes

align/align_trans.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import numpy as np
+import cv2
+from align.matlab_cp2tform import get_similarity_transform_for_cv2
+# reference facial points, a list of coordinates (x,y)
+REFERENCE_FACIAL_POINTS = [  # default reference facial points for crop_size = (112, 112); should adjust REFERENCE_FACIAL_POINTS accordingly for other crop_size
+    [30.29459953, 51.69630051],
+    [65.53179932, 51.50139999],
+    [48.02519989, 71.73660278],
+    [33.54930115, 92.3655014],
+    [62.72990036, 92.20410156],
+]
+DEFAULT_CROP_SIZE = (96, 112)
+class FaceWarpException(Exception):
+    def __str__(self):
+        return "In File {}:{}".format(__file__, super.__str__(self))
+def get_reference_facial_points(
+    output_size=None,
+    inner_padding_factor=0.0,
+    outer_padding=(0, 0),
+    default_square=False,
+):
+    """
+    Function:
+    ----------
+        get reference 5 key points according to crop settings:
+        0. Set default crop_size:
+            if default_square:
+                crop_size = (112, 112)
+            else:
+                crop_size = (96, 112)
+        1. Pad the crop_size by inner_padding_factor in each side;
+        2. Resize crop_size into (output_size - outer_padding*2),
+            pad into output_size with outer_padding;
+        3. Output reference_5point;
+    Parameters:
+    ----------
+        @output_size: (w, h) or None
+            size of aligned face image
+        @inner_padding_factor: (w_factor, h_factor)
+            padding factor for inner (w, h)
+        @outer_padding: (w_pad, h_pad)
+            each row is a pair of coordinates (x, y)
+        @default_square: True or False
+            if True:
+                default crop_size = (112, 112)
+            else:
+                default crop_size = (96, 112);
+        !!! make sure, if output_size is not None:
+                (output_size - outer_padding)
+                = some_scale * (default crop_size * (1.0 + inner_padding_factor))
+    Returns:
+    ----------
+        @reference_5point: 5x2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    # print('\n===> get_reference_facial_points():')
+    # print('---> Params:')
+    # print('            output_size: ', output_size)
+    # print('            inner_padding_factor: ', inner_padding_factor)
+    # print('            outer_padding:', outer_padding)
+    # print('            default_square: ', default_square)
+    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
+    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)
+    # 0) make the inner region a square
+    if default_square:
+        size_diff = max(tmp_crop_size) - tmp_crop_size
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += size_diff
+    # print('---> default:')
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    if (
+        output_size
+        and output_size[0] == tmp_crop_size[0]
+        and output_size[1] == tmp_crop_size[1]
+    ):
+        # print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
+        return tmp_5pts
+    if inner_padding_factor == 0 and outer_padding == (0, 0):
+        if output_size is None:
+            # print('No paddings to do: return default reference points')
+            return tmp_5pts
+        else:
+            raise FaceWarpException(
+                "No paddings to do, output_size must be None or {}".format(
+                    tmp_crop_size
+                )
+            )
+    # check output size
+    if not (0 <= inner_padding_factor <= 1.0):
+        raise FaceWarpException("Not (0 <= inner_padding_factor <= 1.0)")
+    if (
+        inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0
+    ) and output_size is None:
+        output_size = tmp_crop_size * (1 + inner_padding_factor * 2).astype(np.int32)
+        output_size += np.array(outer_padding)
+        # print('              deduced from paddings, output_size = ', output_size)
+    if not (outer_padding[0] < output_size[0] and outer_padding[1] < output_size[1]):
+        raise FaceWarpException(
+            "Not (outer_padding[0] < output_size[0]"
+            "and outer_padding[1] < output_size[1])"
+        )
+    # 1) pad the inner region according inner_padding_factor
+    # print('---> STEP1: pad the inner region according inner_padding_factor')
+    if inner_padding_factor > 0:
+        size_diff = tmp_crop_size * inner_padding_factor * 2
+        tmp_5pts += size_diff / 2
+        tmp_crop_size += np.round(size_diff).astype(np.int32)
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # 2) resize the padded inner region
+    # print('---> STEP2: resize the padded inner region')
+    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              size_bf_outer_pad = ', size_bf_outer_pad)
+    if (
+        size_bf_outer_pad[0] * tmp_crop_size[1]
+        != size_bf_outer_pad[1] * tmp_crop_size[0]
+    ):
+        raise FaceWarpException(
+            "Must have (output_size - outer_padding)"
+            "= some_scale * (crop_size * (1.0 + inner_padding_factor)"
+        )
+    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
+    # print('              resize scale_factor = ', scale_factor)
+    tmp_5pts = tmp_5pts * scale_factor
+    #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
+    #    tmp_5pts = tmp_5pts + size_diff / 2
+    tmp_crop_size = size_bf_outer_pad
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # 3) add outer_padding to make output_size
+    reference_5point = tmp_5pts + np.array(outer_padding)
+    tmp_crop_size = output_size
+    # print('---> STEP3: add outer_padding to make output_size')
+    # print('              crop_size = ', tmp_crop_size)
+    # print('              reference_5pts = ', tmp_5pts)
+    # print('===> end get_reference_facial_points\n')
+    return reference_5point
+def get_affine_transform_matrix(src_pts, dst_pts):
+    """
+    Function:
+    ----------
+        get affine transform matrix 'tfm' from src_pts to dst_pts
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points matrix, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points matrix, each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @tfm: 2x3 np.array
+            transform matrix from src_pts to dst_pts
+    """
+    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
+    n_pts = src_pts.shape[0]
+    ones = np.ones((n_pts, 1), src_pts.dtype)
+    src_pts_ = np.hstack([src_pts, ones])
+    dst_pts_ = np.hstack([dst_pts, ones])
+    #    #print(('src_pts_:\n' + str(src_pts_))
+    #    #print(('dst_pts_:\n' + str(dst_pts_))
+    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)
+    #    #print(('np.linalg.lstsq return A: \n' + str(A))
+    #    #print(('np.linalg.lstsq return res: \n' + str(res))
+    #    #print(('np.linalg.lstsq return rank: \n' + str(rank))
+    #    #print(('np.linalg.lstsq return s: \n' + str(s))
+    if rank == 3:
+        tfm = np.float32([[A[0, 0], A[1, 0], A[2, 0]], [A[0, 1], A[1, 1], A[2, 1]]])
+    elif rank == 2:
+        tfm = np.float32([[A[0, 0], A[1, 0], 0], [A[0, 1], A[1, 1], 0]])
+    return tfm
+def warp_and_crop_face(
+    src_img, facial_pts, reference_pts=None, crop_size=(96, 112), align_type="smilarity"
+):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @src_img: 3x3 np.array
+            input image
+        @facial_pts: could be
+            1)a list of K coordinates (x,y)
+        or
+            2) Kx2 or 2xK np.array
+            each row or col is a pair of coordinates (x, y)
+        @reference_pts: could be
+            1) a list of K coordinates (x,y)
+        or
+            2) Kx2 or 2xK np.array
+            each row or col is a pair of coordinates (x, y)
+        or
+            3) None
+            if None, use default reference facial points
+        @crop_size: (w, h)
+            output face image size
+        @align_type: transform type, could be one of
+            1) 'similarity': use similarity transform
+            2) 'cv2_affine': use the first 3 points to do affine transform,
+                    by calling cv2.getAffineTransform()
+            3) 'affine': use all points to do affine transform
+    Returns:
+    ----------
+        @face_img: output face image with size (w, h) = @crop_size
+    """
+    if reference_pts is None:
+        if crop_size[0] == 96 and crop_size[1] == 112:
+            reference_pts = REFERENCE_FACIAL_POINTS
+        else:
+            default_square = False
+            inner_padding_factor = 0
+            outer_padding = (0, 0)
+            output_size = crop_size
+            reference_pts = get_reference_facial_points(
+                output_size, inner_padding_factor, outer_padding, default_square
+            )
+    ref_pts = np.float32(reference_pts)
+    ref_pts_shp = ref_pts.shape
+    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
+        raise FaceWarpException("reference_pts.shape must be (K,2) or (2,K) and K>2")
+    if ref_pts_shp[0] == 2:
+        ref_pts = ref_pts.T
+    src_pts = np.float32(facial_pts)
+    src_pts_shp = src_pts.shape
+    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
+        raise FaceWarpException("facial_pts.shape must be (K,2) or (2,K) and K>2")
+    if src_pts_shp[0] == 2:
+        src_pts = src_pts.T
+    #    #print('--->src_pts:\n', src_pts
+    #    #print('--->ref_pts\n', ref_pts
+    if src_pts.shape != ref_pts.shape:
+        raise FaceWarpException("facial_pts and reference_pts must have the same shape")
+    if align_type == "cv2_affine":
+        tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
+    #        #print(('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
+    elif align_type == "affine":
+        tfm = get_affine_transform_matrix(src_pts, ref_pts)
+    #        #print(('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
+    else:
+        tfm, tfm_inv = get_similarity_transform_for_cv2(src_pts, ref_pts)
+    #        #print(('get_similarity_transform_for_cv2() returns tfm=\n' + str(tfm))
+    #    #print('--->Transform matrix: '
+    #    #print(('type(tfm):' + str(type(tfm)))
+    #    #print(('tfm.dtype:' + str(tfm.dtype))
+    #    #print( tfm
+    face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))
+    return face_img, tfm

align/box_utils.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import numpy as np
+from PIL import Image
+def nms(boxes, overlap_threshold=0.5, mode="union"):
+    """Non-maximum suppression.
+    Arguments:
+        boxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        overlap_threshold: a float number.
+        mode: 'union' or 'min'.
+    Returns:
+        list with indices of the selected boxes
+    """
+    # if there are no boxes, return the empty list
+    if len(boxes) == 0:
+        return []
+    # list of picked indices
+    pick = []
+    # grab the coordinates of the bounding boxes
+    x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)]
+    area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0)
+    ids = np.argsort(score)  # in increasing order
+    while len(ids) > 0:
+        # grab index of the largest value
+        last = len(ids) - 1
+        i = ids[last]
+        pick.append(i)
+        # compute intersections
+        # of the box with the largest score
+        # with the rest of boxes
+        # left top corner of intersection boxes
+        ix1 = np.maximum(x1[i], x1[ids[:last]])
+        iy1 = np.maximum(y1[i], y1[ids[:last]])
+        # right bottom corner of intersection boxes
+        ix2 = np.minimum(x2[i], x2[ids[:last]])
+        iy2 = np.minimum(y2[i], y2[ids[:last]])
+        # width and height of intersection boxes
+        w = np.maximum(0.0, ix2 - ix1 + 1.0)
+        h = np.maximum(0.0, iy2 - iy1 + 1.0)
+        # intersections' areas
+        inter = w * h
+        if mode == "min":
+            overlap = inter / np.minimum(area[i], area[ids[:last]])
+        elif mode == "union":
+            # intersection over union (IoU)
+            overlap = inter / (area[i] + area[ids[:last]] - inter)
+        # delete all boxes where overlap is too big
+        ids = np.delete(
+            ids, np.concatenate([[last], np.where(overlap > overlap_threshold)[0]])
+        )
+    return pick
+def convert_to_square(bboxes):
+    """Convert bounding boxes to a square form.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+    Returns:
+        a float numpy array of shape [n, 5],
+            squared bounding boxes.
+    """
+    square_bboxes = np.zeros_like(bboxes)
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    h = y2 - y1 + 1.0
+    w = x2 - x1 + 1.0
+    max_side = np.maximum(h, w)
+    square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5
+    square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5
+    square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0
+    square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0
+    return square_bboxes
+def calibrate_box(bboxes, offsets):
+    """Transform bounding boxes to be more like true bounding boxes.
+    'offsets' is one of the outputs of the nets.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5].
+        offsets: a float numpy array of shape [n, 4].
+    Returns:
+        a float numpy array of shape [n, 5].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w = x2 - x1 + 1.0
+    h = y2 - y1 + 1.0
+    w = np.expand_dims(w, 1)
+    h = np.expand_dims(h, 1)
+    # this is what happening here:
+    # tx1, ty1, tx2, ty2 = [offsets[:, i] for i in range(4)]
+    # x1_true = x1 + tx1*w
+    # y1_true = y1 + ty1*h
+    # x2_true = x2 + tx2*w
+    # y2_true = y2 + ty2*h
+    # below is just more compact form of this
+    # are offsets always such that
+    # x1 < x2 and y1 < y2 ?
+    translation = np.hstack([w, h, w, h]) * offsets
+    bboxes[:, 0:4] = bboxes[:, 0:4] + translation
+    return bboxes
+def get_image_boxes(bounding_boxes, img, size=24):
+    """Cut out boxes from the image.
+    Arguments:
+        bounding_boxes: a float numpy array of shape [n, 5].
+        img: an instance of PIL.Image.
+        size: an integer, size of cutouts.
+    Returns:
+        a float numpy array of shape [n, 3, size, size].
+    """
+    num_boxes = len(bounding_boxes)
+    width, height = img.size
+    [dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes(
+        bounding_boxes, width, height
+    )
+    img_boxes = np.zeros((num_boxes, 3, size, size), "float32")
+    for i in range(num_boxes):
+        img_box = np.zeros((h[i], w[i], 3), "uint8")
+        img_array = np.asarray(img, "uint8")
+        img_box[dy[i] : (edy[i] + 1), dx[i] : (edx[i] + 1), :] = img_array[
+            y[i] : (ey[i] + 1), x[i] : (ex[i] + 1), :
+        ]
+        # resize
+        img_box = Image.fromarray(img_box)
+        img_box = img_box.resize((size, size), Image.BILINEAR)
+        img_box = np.asarray(img_box, "float32")
+        img_boxes[i, :, :, :] = _preprocess(img_box)
+    return img_boxes
+def correct_bboxes(bboxes, width, height):
+    """Crop boxes that are too big and get coordinates
+    with respect to cutouts.
+    Arguments:
+        bboxes: a float numpy array of shape [n, 5],
+            where each row is (xmin, ymin, xmax, ymax, score).
+        width: a float number.
+        height: a float number.
+    Returns:
+        dy, dx, edy, edx: a int numpy arrays of shape [n],
+            coordinates of the boxes with respect to the cutouts.
+        y, x, ey, ex: a int numpy arrays of shape [n],
+            corrected ymin, xmin, ymax, xmax.
+        h, w: a int numpy arrays of shape [n],
+            just heights and widths of boxes.
+        in the following order:
+            [dy, edy, dx, edx, y, ey, x, ex, w, h].
+    """
+    x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)]
+    w, h = x2 - x1 + 1.0, y2 - y1 + 1.0
+    num_boxes = bboxes.shape[0]
+    # 'e' stands for end
+    # (x, y) -> (ex, ey)
+    x, y, ex, ey = x1, y1, x2, y2
+    # we need to cut out a box from the image.
+    # (x, y, ex, ey) are corrected coordinates of the box
+    # in the image.
+    # (dx, dy, edx, edy) are coordinates of the box in the cutout
+    # from the image.
+    dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,))
+    edx, edy = w.copy() - 1.0, h.copy() - 1.0
+    # if box's bottom right corner is too far right
+    ind = np.where(ex > width - 1.0)[0]
+    edx[ind] = w[ind] + width - 2.0 - ex[ind]
+    ex[ind] = width - 1.0
+    # if box's bottom right corner is too low
+    ind = np.where(ey > height - 1.0)[0]
+    edy[ind] = h[ind] + height - 2.0 - ey[ind]
+    ey[ind] = height - 1.0
+    # if box's top left corner is too far left
+    ind = np.where(x < 0.0)[0]
+    dx[ind] = 0.0 - x[ind]
+    x[ind] = 0.0
+    # if box's top left corner is too high
+    ind = np.where(y < 0.0)[0]
+    dy[ind] = 0.0 - y[ind]
+    y[ind] = 0.0
+    return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h]
+    return_list = [i.astype("int32") for i in return_list]
+    return return_list
+def _preprocess(img):
+    """Preprocessing step before feeding the network.
+    Arguments:
+        img: a float numpy array of shape [h, w, c].
+    Returns:
+        a float numpy array of shape [1, c, h, w].
+    """
+    img = img.transpose((2, 0, 1))
+    img = np.expand_dims(img, 0)
+    img = (img - 127.5) * 0.0078125
+    return img

align/detector.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import numpy as np
+import torch
+from torch.autograd import Variable
+import sys
+sys.path.append("./")
+from align.get_nets import PNet, RNet, ONet
+from align.box_utils import nms, calibrate_box, get_image_boxes, convert_to_square
+from align.first_stage import run_first_stage
+def detect_faces(
+    image,
+    min_face_size=20.0,
+    thresholds=[0.6, 0.7, 0.8],
+    nms_thresholds=[0.7, 0.7, 0.7],
+):
+    """
+    Arguments:
+        image: an instance of PIL.Image.
+        min_face_size: a float number.
+        thresholds: a list of length 3.
+        nms_thresholds: a list of length 3.
+    Returns:
+        two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10],
+        bounding boxes and facial landmarks.
+    """
+    # LOAD MODELS
+    pnet = PNet()
+    rnet = RNet()
+    onet = ONet()
+    onet.eval()
+    # BUILD AN IMAGE PYRAMID
+    width, height = image.size
+    min_length = min(height, width)
+    min_detection_size = 12
+    factor = 0.707  # sqrt(0.5)
+    # scales for scaling the image
+    scales = []
+    # scales the image so that
+    # minimum size that we can detect equals to
+    # minimum face size that we want to detect
+    m = min_detection_size / min_face_size
+    min_length *= m
+    factor_count = 0
+    while min_length > min_detection_size:
+        scales.append(m * factor**factor_count)
+        min_length *= factor
+        factor_count += 1
+    # STAGE 1
+    # it will be returned
+    bounding_boxes = []
+    # run P-Net on different scales
+    for s in scales:
+        boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0])
+        bounding_boxes.append(boxes)
+    # collect boxes (and offsets, and scores) from different scales
+    bounding_boxes = [i for i in bounding_boxes if i is not None]
+    bounding_boxes = np.vstack(bounding_boxes)
+    keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0])
+    bounding_boxes = bounding_boxes[keep]
+    # use offsets predicted by pnet to transform bounding boxes
+    bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:])
+    # shape [n_boxes, 5]
+    bounding_boxes = convert_to_square(bounding_boxes)
+    bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+    # STAGE 2
+    img_boxes = get_image_boxes(bounding_boxes, image, size=24)
+    img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
+    output = rnet(img_boxes)
+    offsets = output[0].data.numpy()  # shape [n_boxes, 4]
+    probs = output[1].data.numpy()  # shape [n_boxes, 2]
+    keep = np.where(probs[:, 1] > thresholds[1])[0]
+    bounding_boxes = bounding_boxes[keep]
+    bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+    offsets = offsets[keep]
+    keep = nms(bounding_boxes, nms_thresholds[1])
+    bounding_boxes = bounding_boxes[keep]
+    bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])
+    bounding_boxes = convert_to_square(bounding_boxes)
+    bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
+    # STAGE 3
+    img_boxes = get_image_boxes(bounding_boxes, image, size=48)
+    if len(img_boxes) == 0:
+        return [], []
+    img_boxes = Variable(torch.FloatTensor(img_boxes), volatile=True)
+    output = onet(img_boxes)
+    landmarks = output[0].data.numpy()  # shape [n_boxes, 10]
+    offsets = output[1].data.numpy()  # shape [n_boxes, 4]
+    probs = output[2].data.numpy()  # shape [n_boxes, 2]
+    keep = np.where(probs[:, 1] > thresholds[2])[0]
+    bounding_boxes = bounding_boxes[keep]
+    bounding_boxes[:, 4] = probs[keep, 1].reshape((-1,))
+    offsets = offsets[keep]
+    landmarks = landmarks[keep]
+    # compute landmark points
+    width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0
+    height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0
+    xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1]
+    landmarks[:, 0:5] = (
+        np.expand_dims(xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5]
+    )
+    landmarks[:, 5:10] = (
+        np.expand_dims(ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10]
+    )
+    bounding_boxes = calibrate_box(bounding_boxes, offsets)
+    keep = nms(bounding_boxes, nms_thresholds[2], mode="min")
+    bounding_boxes = bounding_boxes[keep]
+    landmarks = landmarks[keep]
+    return bounding_boxes, landmarks

align/first_stage.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import torch
+from torch.autograd import Variable
+import math
+from PIL import Image
+import numpy as np
+from align.box_utils import nms, _preprocess
+def run_first_stage(image, net, scale, threshold):
+    """Run P-Net, generate bounding boxes, and do NMS.
+    Arguments:
+        image: an instance of PIL.Image.
+        net: an instance of pytorch's nn.Module, P-Net.
+        scale: a float number,
+            scale width and height of the image by this number.
+        threshold: a float number,
+            threshold on the probability of a face when generating
+            bounding boxes from predictions of the net.
+    Returns:
+        a float numpy array of shape [n_boxes, 9],
+            bounding boxes with scores and offsets (4 + 1 + 4).
+    """
+    # scale the image and convert it to a float array
+    width, height = image.size
+    sw, sh = math.ceil(width * scale), math.ceil(height * scale)
+    img = image.resize((sw, sh), Image.BILINEAR)
+    img = np.asarray(img, "float32")
+    img = Variable(torch.FloatTensor(_preprocess(img)), volatile=True)
+    output = net(img)
+    probs = output[1].data.numpy()[0, 1, :, :]
+    offsets = output[0].data.numpy()
+    # probs: probability of a face at each sliding window
+    # offsets: transformations to true bounding boxes
+    boxes = _generate_bboxes(probs, offsets, scale, threshold)
+    if len(boxes) == 0:
+        return None
+    keep = nms(boxes[:, 0:5], overlap_threshold=0.5)
+    return boxes[keep]
+def _generate_bboxes(probs, offsets, scale, threshold):
+    """Generate bounding boxes at places
+    where there is probably a face.
+    Arguments:
+        probs: a float numpy array of shape [n, m].
+        offsets: a float numpy array of shape [1, 4, n, m].
+        scale: a float number,
+            width and height of the image were scaled by this number.
+        threshold: a float number.
+    Returns:
+        a float numpy array of shape [n_boxes, 9]
+    """
+    # applying P-Net is equivalent, in some sense, to
+    # moving 12x12 window with stride 2
+    stride = 2
+    cell_size = 12
+    # indices of boxes where there is probably a face
+    inds = np.where(probs > threshold)
+    if inds[0].size == 0:
+        return np.array([])
+    # transformations of bounding boxes
+    tx1, ty1, tx2, ty2 = [offsets[0, i, inds[0], inds[1]] for i in range(4)]
+    # they are defined as:
+    # w = x2 - x1 + 1
+    # h = y2 - y1 + 1
+    # x1_true = x1 + tx1*w
+    # x2_true = x2 + tx2*w
+    # y1_true = y1 + ty1*h
+    # y2_true = y2 + ty2*h
+    offsets = np.array([tx1, ty1, tx2, ty2])
+    score = probs[inds[0], inds[1]]
+    # P-Net is applied to scaled images
+    # so we need to rescale bounding boxes back
+    bounding_boxes = np.vstack(
+        [
+            np.round((stride * inds[1] + 1.0) / scale),
+            np.round((stride * inds[0] + 1.0) / scale),
+            np.round((stride * inds[1] + 1.0 + cell_size) / scale),
+            np.round((stride * inds[0] + 1.0 + cell_size) / scale),
+            score,
+            offsets,
+        ]
+    )
+    # why one is added?
+    return bounding_boxes.T

align/get_nets.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from collections import OrderedDict
+import numpy as np
+class Flatten(nn.Module):
+    def __init__(self):
+        super(Flatten, self).__init__()
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, c, h, w].
+        Returns:
+            a float tensor with shape [batch_size, c*h*w].
+        """
+        # without this pretrained model isn't working
+        x = x.transpose(3, 2).contiguous()
+        return x.view(x.size(0), -1)
+class PNet(nn.Module):
+    def __init__(self):
+        super(PNet, self).__init__()
+        # suppose we have input with size HxW, then
+        # after first layer: H - 2,
+        # after pool: ceil((H - 2)/2),
+        # after second conv: ceil((H - 2)/2) - 2,
+        # after last conv: ceil((H - 2)/2) - 4,
+        # and the same for W
+        self.features = nn.Sequential(
+            OrderedDict(
+                [
+                    ("conv1", nn.Conv2d(3, 10, 3, 1)),
+                    ("prelu1", nn.PReLU(10)),
+                    ("pool1", nn.MaxPool2d(2, 2, ceil_mode=True)),
+                    ("conv2", nn.Conv2d(10, 16, 3, 1)),
+                    ("prelu2", nn.PReLU(16)),
+                    ("conv3", nn.Conv2d(16, 32, 3, 1)),
+                    ("prelu3", nn.PReLU(32)),
+                ]
+            )
+        )
+        self.conv4_1 = nn.Conv2d(32, 2, 1, 1)
+        self.conv4_2 = nn.Conv2d(32, 4, 1, 1)
+        weights = np.load("align/pnet.npy", allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4, h', w'].
+            a: a float tensor with shape [batch_size, 2, h', w'].
+        """
+        x = self.features(x)
+        a = self.conv4_1(x)
+        b = self.conv4_2(x)
+        a = F.softmax(a)
+        return b, a
+class RNet(nn.Module):
+    def __init__(self):
+        super(RNet, self).__init__()
+        self.features = nn.Sequential(
+            OrderedDict(
+                [
+                    ("conv1", nn.Conv2d(3, 28, 3, 1)),
+                    ("prelu1", nn.PReLU(28)),
+                    ("pool1", nn.MaxPool2d(3, 2, ceil_mode=True)),
+                    ("conv2", nn.Conv2d(28, 48, 3, 1)),
+                    ("prelu2", nn.PReLU(48)),
+                    ("pool2", nn.MaxPool2d(3, 2, ceil_mode=True)),
+                    ("conv3", nn.Conv2d(48, 64, 2, 1)),
+                    ("prelu3", nn.PReLU(64)),
+                    ("flatten", Flatten()),
+                    ("conv4", nn.Linear(576, 128)),
+                    ("prelu4", nn.PReLU(128)),
+                ]
+            )
+        )
+        self.conv5_1 = nn.Linear(128, 2)
+        self.conv5_2 = nn.Linear(128, 4)
+        weights = np.load("align/rnet.npy", allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv5_1(x)
+        b = self.conv5_2(x)
+        a = F.softmax(a)
+        return b, a
+class ONet(nn.Module):
+    def __init__(self):
+        super(ONet, self).__init__()
+        self.features = nn.Sequential(
+            OrderedDict(
+                [
+                    ("conv1", nn.Conv2d(3, 32, 3, 1)),
+                    ("prelu1", nn.PReLU(32)),
+                    ("pool1", nn.MaxPool2d(3, 2, ceil_mode=True)),
+                    ("conv2", nn.Conv2d(32, 64, 3, 1)),
+                    ("prelu2", nn.PReLU(64)),
+                    ("pool2", nn.MaxPool2d(3, 2, ceil_mode=True)),
+                    ("conv3", nn.Conv2d(64, 64, 3, 1)),
+                    ("prelu3", nn.PReLU(64)),
+                    ("pool3", nn.MaxPool2d(2, 2, ceil_mode=True)),
+                    ("conv4", nn.Conv2d(64, 128, 2, 1)),
+                    ("prelu4", nn.PReLU(128)),
+                    ("flatten", Flatten()),
+                    ("conv5", nn.Linear(1152, 256)),
+                    ("drop5", nn.Dropout(0.25)),
+                    ("prelu5", nn.PReLU(256)),
+                ]
+            )
+        )
+        self.conv6_1 = nn.Linear(256, 2)
+        self.conv6_2 = nn.Linear(256, 4)
+        self.conv6_3 = nn.Linear(256, 10)
+        weights = np.load("align/onet.npy", allow_pickle=True)[()]
+        for n, p in self.named_parameters():
+            p.data = torch.FloatTensor(weights[n])
+    def forward(self, x):
+        """
+        Arguments:
+            x: a float tensor with shape [batch_size, 3, h, w].
+        Returns:
+            c: a float tensor with shape [batch_size, 10].
+            b: a float tensor with shape [batch_size, 4].
+            a: a float tensor with shape [batch_size, 2].
+        """
+        x = self.features(x)
+        a = self.conv6_1(x)
+        b = self.conv6_2(x)
+        c = self.conv6_3(x)
+        a = F.softmax(a)
+        return c, b, a

align/matlab_cp2tform.py ADDED Viewed

	@@ -0,0 +1,329 @@

+import numpy as np
+from numpy.linalg import inv, norm, lstsq
+from numpy.linalg import matrix_rank as rank
+class MatlabCp2tormException(Exception):
+    def __str__(self):
+        return "In File {}:{}".format(__file__, super.__str__(self))
+def tformfwd(trans, uv):
+    """
+    Function:
+    ----------
+        apply affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of transformed coordinates (x, y)
+    """
+    uv = np.hstack((uv, np.ones((uv.shape[0], 1))))
+    xy = np.dot(uv, trans)
+    xy = xy[:, 0:-1]
+    return xy
+def tforminv(trans, uv):
+    """
+    Function:
+    ----------
+        apply the inverse of affine transform 'trans' to uv
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix
+        @uv: Kx2 np.array
+            each row is a pair of coordinates (x, y)
+    Returns:
+    ----------
+        @xy: Kx2 np.array
+            each row is a pair of inverse-transformed coordinates (x, y)
+    """
+    Tinv = inv(trans)
+    xy = tformfwd(Tinv, uv)
+    return xy
+def findNonreflectiveSimilarity(uv, xy, options=None):
+    options = {"K": 2}
+    K = options["K"]
+    M = xy.shape[0]
+    x = xy[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    y = xy[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    # print('--->x, y:\n', x, y
+    tmp1 = np.hstack((x, y, np.ones((M, 1)), np.zeros((M, 1))))
+    tmp2 = np.hstack((y, -x, np.zeros((M, 1)), np.ones((M, 1))))
+    X = np.vstack((tmp1, tmp2))
+    # print('--->X.shape: ', X.shape
+    # print('X:\n', X
+    u = uv[:, 0].reshape((-1, 1))  # use reshape to keep a column vector
+    v = uv[:, 1].reshape((-1, 1))  # use reshape to keep a column vector
+    U = np.vstack((u, v))
+    # print('--->U.shape: ', U.shape
+    # print('U:\n', U
+    # We know that X * r = U
+    if rank(X) >= 2 * K:
+        r, _, _, _ = lstsq(X, U)
+        r = np.squeeze(r)
+    else:
+        raise Exception("cp2tform: two Unique Points Req")
+    # print('--->r:\n', r
+    sc = r[0]
+    ss = r[1]
+    tx = r[2]
+    ty = r[3]
+    Tinv = np.array([[sc, -ss, 0], [ss, sc, 0], [tx, ty, 1]])
+    # print('--->Tinv:\n', Tinv
+    T = inv(Tinv)
+    # print('--->T:\n', T
+    T[:, 2] = np.array([0, 0, 1])
+    return T, Tinv
+def findSimilarity(uv, xy, options=None):
+    options = {"K": 2}
+    #    uv = np.array(uv)
+    #    xy = np.array(xy)
+    # Solve for trans1
+    trans1, trans1_inv = findNonreflectiveSimilarity(uv, xy, options)
+    # Solve for trans2
+    # manually reflect the xy data across the Y-axis
+    xyR = xy
+    xyR[:, 0] = -1 * xyR[:, 0]
+    trans2r, trans2r_inv = findNonreflectiveSimilarity(uv, xyR, options)
+    # manually reflect the tform to undo the reflection done on xyR
+    TreflectY = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    trans2 = np.dot(trans2r, TreflectY)
+    # Figure out if trans1 or trans2 is better
+    xy1 = tformfwd(trans1, uv)
+    norm1 = norm(xy1 - xy)
+    xy2 = tformfwd(trans2, uv)
+    norm2 = norm(xy2 - xy)
+    if norm1 <= norm2:
+        return trans1, trans1_inv
+    else:
+        trans2_inv = inv(trans2)
+        return trans2, trans2_inv
+def get_similarity_transform(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'trans':
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y, 1] = [u, v, 1] * trans
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        @reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+    Returns:
+    ----------
+       @trans: 3x3 np.array
+            transform matrix from uv to xy
+        trans_inv: 3x3 np.array
+            inverse of trans, transform matrix from xy to uv
+    """
+    if reflective:
+        trans, trans_inv = findSimilarity(src_pts, dst_pts)
+    else:
+        trans, trans_inv = findNonreflectiveSimilarity(src_pts, dst_pts)
+    return trans, trans_inv
+def cvt_tform_mat_for_cv2(trans):
+    """
+    Function:
+    ----------
+        Convert Transform Matrix 'trans' into 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+    Parameters:
+    ----------
+        @trans: 3x3 np.array
+            transform matrix from uv to xy
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    cv2_trans = trans[:, 0:2].T
+    return cv2_trans
+def get_similarity_transform_for_cv2(src_pts, dst_pts, reflective=True):
+    """
+    Function:
+    ----------
+        Find Similarity Transform Matrix 'cv2_trans' which could be
+        directly used by cv2.warpAffine():
+            u = src_pts[:, 0]
+            v = src_pts[:, 1]
+            x = dst_pts[:, 0]
+            y = dst_pts[:, 1]
+            [x, y].T = cv_trans * [u, v, 1].T
+    Parameters:
+    ----------
+        @src_pts: Kx2 np.array
+            source points, each row is a pair of coordinates (x, y)
+        @dst_pts: Kx2 np.array
+            destination points, each row is a pair of transformed
+            coordinates (x, y)
+        reflective: True or False
+            if True:
+                use reflective similarity transform
+            else:
+                use non-reflective similarity transform
+    Returns:
+    ----------
+        @cv2_trans: 2x3 np.array
+            transform matrix from src_pts to dst_pts, could be directly used
+            for cv2.warpAffine()
+    """
+    trans, trans_inv = get_similarity_transform(src_pts, dst_pts, reflective)
+    cv2_trans = cvt_tform_mat_for_cv2(trans)
+    cv2_trans_inv = cvt_tform_mat_for_cv2(trans_inv)
+    return cv2_trans, cv2_trans_inv
+if __name__ == "__main__":
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+    # In Matlab, run:
+    #
+    #   uv = [u'; v'];
+    #   xy = [x'; y'];
+    #   tform_sim=cp2tform(uv,xy,'similarity');
+    #
+    #   trans = tform_sim.tdata.T
+    #   ans =
+    #       -0.0764   -1.6190         0
+    #        1.6190   -0.0764         0
+    #       -3.2156    0.0290    1.0000
+    #   trans_inv = tform_sim.tdata.Tinv
+    #    ans =
+    #
+    #       -0.0291    0.6163         0
+    #       -0.6163   -0.0291         0
+    #       -0.0756    1.9826    1.0000
+    #    xy_m=tformfwd(tform_sim, u,v)
+    #
+    #    xy_m =
+    #
+    #       -3.2156    0.0290
+    #        1.1833   -9.9143
+    #        5.0323    2.8853
+    #    uv_m=tforminv(tform_sim, x,y)
+    #
+    #    uv_m =
+    #
+    #        0.5698    1.3953
+    #        6.0872    2.2733
+    #       -2.6570    4.3314
+    """
+    u = [0, 6, -2]
+    v = [0, 3, 5]
+    x = [-1, 0, 4]
+    y = [-1, -10, 4]
+    uv = np.array((u, v)).T
+    xy = np.array((x, y)).T
+    print("\n--->uv:")
+    print(uv)
+    print("\n--->xy:")
+    print(xy)
+    trans, trans_inv = get_similarity_transform(uv, xy)
+    print("\n--->trans matrix:")
+    print(trans)
+    print("\n--->trans_inv matrix:")
+    print(trans_inv)
+    print("\n---> apply transform to uv")
+    print("\nxy_m = uv_augmented * trans")
+    uv_aug = np.hstack((uv, np.ones((uv.shape[0], 1))))
+    xy_m = np.dot(uv_aug, trans)
+    print(xy_m)
+    print("\nxy_m = tformfwd(trans, uv)")
+    xy_m = tformfwd(trans, uv)
+    print(xy_m)
+    print("\n---> apply inverse transform to xy")
+    print("\nuv_m = xy_augmented * trans_inv")
+    xy_aug = np.hstack((xy, np.ones((xy.shape[0], 1))))
+    uv_m = np.dot(xy_aug, trans_inv)
+    print(uv_m)
+    print("\nuv_m = tformfwd(trans_inv, xy)")
+    uv_m = tformfwd(trans_inv, xy)
+    print(uv_m)
+    uv_m = tforminv(trans, xy)
+    print("\nuv_m = tforminv(trans, xy)")
+    print(uv_m)

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import gradio as gr
+import torch
+from PIL import Image
+import numpy as np
+from util.feature_extraction_utils import normalize_transforms
+from util.attack_utils import Attack
+from util.prepare_utils import prepare_models, prepare_dir_vec, get_ensemble
+from align.detector import detect_faces
+from align.align_trans import get_reference_facial_points, warp_and_crop_face
+import torchvision.transforms as transforms
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(device)
+to_tensor = transforms.ToTensor()
+eps = 0.05
+n_iters = 50
+input_size = [112, 112]
+attack_type = "lpips"
+c_tv = None
+c_sim = 0.05
+lr = 0.0025
+net_type = "alex"
+noise_size = 0.005
+n_starts = 1
+kernel_size_gf = 7
+sigma_gf = 3
+combination = True
+using_subspace = False
+V_reduction_root = "./"
+model_backbones = ["IR_152", "IR_152", "ResNet_152", "ResNet_152"]
+model_roots = [
+    "models/Backbone_IR_152_Arcface_Epoch_112.pth",
+    "models/Backbone_IR_152_Cosface_Epoch_70.pth",
+    "models/Backbone_ResNet_152_Arcface_Epoch_65.pth",
+    "models/Backbone_ResNet_152_Cosface_Epoch_68.pth",
+]
+direction = 1
+crop_size = 112
+scale = crop_size / 112.0
+models_attack, V_reduction, dim = prepare_models(
+    model_backbones,
+    input_size,
+    model_roots,
+    kernel_size_gf,
+    sigma_gf,
+    combination,
+    using_subspace,
+    V_reduction_root,
+)
+def protect(img):
+    img = Image.fromarray(img)
+    reference = get_reference_facial_points(default_square=True) * scale
+    h, w, c = np.array(img).shape
+    _, landmarks = detect_faces(img)
+    facial5points = [[landmarks[0][j], landmarks[0][j + 5]] for j in range(5)]
+    _, tfm = warp_and_crop_face(
+        np.array(img), facial5points, reference, crop_size=(crop_size, crop_size)
+    )
+    # pytorch transform
+    theta = normalize_transforms(tfm, w, h)
+    tensor_img = to_tensor(img).unsqueeze(0).to(device)
+    V_reduction = None
+    dim = 512
+    # Find gradient direction vector
+    dir_vec_extractor = get_ensemble(
+        models=models_attack,
+        sigma_gf=None,
+        kernel_size_gf=None,
+        combination=False,
+        V_reduction=V_reduction,
+        warp=True,
+        theta_warp=theta,
+    )
+    dir_vec = prepare_dir_vec(dir_vec_extractor, tensor_img, dim, combination)
+    img_attacked = tensor_img.clone()
+    attack = Attack(
+        models_attack,
+        dim,
+        attack_type,
+        eps,
+        c_sim,
+        net_type,
+        lr,
+        n_iters,
+        noise_size,
+        n_starts,
+        c_tv,
+        sigma_gf,
+        kernel_size_gf,
+        combination,
+        warp=True,
+        theta_warp=theta,
+        V_reduction=V_reduction,
+    )
+    img_attacked = attack.execute(tensor_img, dir_vec, direction).detach().cpu()
+    img_attacked_pil = transforms.ToPILImage()(img_attacked[0])
+    return img_attacked_pil
+gr.Interface(
+    fn=protect,
+    inputs=gr.components.Image(shape=(512, 512)),
+    outputs=gr.components.Image(type="pil"),
+    allow_flagging="never",
+).launch(show_error=True, quiet=False, share=True)

backbone/__init__.py ADDED Viewed

File without changes

backbone/model_irse.py ADDED Viewed

	@@ -0,0 +1,260 @@

+import torch
+import torch.nn as nn
+from torch.nn import (
+    Linear,
+    Conv2d,
+    BatchNorm1d,
+    BatchNorm2d,
+    PReLU,
+    ReLU,
+    Sigmoid,
+    Dropout,
+    MaxPool2d,
+    AdaptiveAvgPool2d,
+    Sequential,
+    Module,
+)
+from collections import namedtuple
+# Support: ['IR_50', 'IR_101', 'IR_152', 'IR_SE_50', 'IR_SE_101', 'IR_SE_152']
+class Flatten(Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+def l2_norm(input, axis=1):
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(
+            channels, channels // reduction, kernel_size=1, padding=0, bias=False
+        )
+        nn.init.xavier_uniform_(self.fc1.weight.data)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(
+            channels // reduction, channels, kernel_size=1, padding=0, bias=False
+        )
+        self.sigmoid = Sigmoid()
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+class bottleneck_IR(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth),
+            )
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth),
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class bottleneck_IR_SE(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth),
+            )
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth),
+            SEModule(depth, 16),
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class Bottleneck(namedtuple("Block", ["in_channel", "depth", "stride"])):
+    """A named tuple describing a ResNet block."""
+def get_block(in_channel, depth, num_units, stride=2):
+    return [Bottleneck(in_channel, depth, stride)] + [
+        Bottleneck(depth, depth, 1) for i in range(num_units - 1)
+    ]
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3),
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3),
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3),
+        ]
+    return blocks
+class Backbone(Module):
+    def __init__(self, input_size, num_layers, mode="ir"):
+        super(Backbone, self).__init__()
+        assert input_size[0] in [
+            112,
+            224,
+        ], "input_size should be [112, 112] or [224, 224]"
+        assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
+        assert mode in ["ir", "ir_se"], "mode should be ir or ir_se"
+        blocks = get_blocks(num_layers)
+        if mode == "ir":
+            unit_module = bottleneck_IR
+        elif mode == "ir_se":
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(
+            Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64), PReLU(64)
+        )
+        if input_size[0] == 112:
+            self.output_layer = Sequential(
+                BatchNorm2d(512),
+                Dropout(),
+                Flatten(),
+                Linear(512 * 7 * 7, 512),
+                BatchNorm1d(512),
+            )
+        else:
+            self.output_layer = Sequential(
+                BatchNorm2d(512),
+                Dropout(),
+                Flatten(),
+                Linear(512 * 14 * 14, 512),
+                BatchNorm1d(512),
+            )
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(
+                        bottleneck.in_channel, bottleneck.depth, bottleneck.stride
+                    )
+                )
+        self.body = Sequential(*modules)
+        self._initialize_weights()
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return x
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.xavier_uniform_(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm1d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight.data)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+def IR_50(input_size):
+    """Constructs a ir-50 model."""
+    model = Backbone(input_size, 50, "ir")
+    return model
+def IR_101(input_size):
+    """Constructs a ir-101 model."""
+    model = Backbone(input_size, 100, "ir")
+    return model
+def IR_152(input_size):
+    """Constructs a ir-152 model."""
+    model = Backbone(input_size, 152, "ir")
+    return model
+def IR_SE_50(input_size):
+    """Constructs a ir_se-50 model."""
+    model = Backbone(input_size, 50, "ir_se")
+    return model
+def IR_SE_101(input_size):
+    """Constructs a ir_se-101 model."""
+    model = Backbone(input_size, 100, "ir_se")
+    return model
+def IR_SE_152(input_size):
+    """Constructs a ir_se-152 model."""
+    model = Backbone(input_size, 152, "ir_se")
+    return model

backbone/model_resnet.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import torch.nn as nn
+from torch.nn import (
+    Linear,
+    Conv2d,
+    BatchNorm1d,
+    BatchNorm2d,
+    ReLU,
+    Dropout,
+    MaxPool2d,
+    Sequential,
+    Module,
+)
+# Support: ['ResNet_50', 'ResNet_101', 'ResNet_152']
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False
+    )
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class BasicBlock(Module):
+    expansion = 1
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = BatchNorm2d(planes)
+        self.relu = ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = BatchNorm2d(planes)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class Bottleneck(Module):
+    expansion = 4
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = conv1x1(inplanes, planes)
+        self.bn1 = BatchNorm2d(planes)
+        self.conv2 = conv3x3(planes, planes, stride)
+        self.bn2 = BatchNorm2d(planes)
+        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.bn3 = BatchNorm2d(planes * self.expansion)
+        self.relu = ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+class ResNet(Module):
+    def __init__(self, input_size, block, layers, zero_init_residual=True):
+        super(ResNet, self).__init__()
+        assert input_size[0] in [
+            112,
+            224,
+        ], "input_size should be [112, 112] or [224, 224]"
+        self.inplanes = 64
+        self.conv1 = Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
+        self.bn1 = BatchNorm2d(64)
+        self.relu = ReLU(inplace=True)
+        self.maxpool = MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+        self.bn_o1 = BatchNorm2d(2048)
+        self.dropout = Dropout()
+        if input_size[0] == 112:
+            self.fc = Linear(2048 * 4 * 4, 512)
+        else:
+            self.fc = Linear(2048 * 8 * 8, 512)
+        self.bn_o2 = BatchNorm1d(512)
+        for m in self.modules():
+            if isinstance(m, Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                BatchNorm2d(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+        return Sequential(*layers)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        x = self.bn_o1(x)
+        x = self.dropout(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        x = self.bn_o2(x)
+        return x
+def ResNet_18(input_size, **kwargs):
+    """Constructs a ResNet-50 model."""
+    model = ResNet(input_size, Bottleneck, [2, 2, 2, 2], **kwargs)
+    return model
+def ResNet_50(input_size, **kwargs):
+    """Constructs a ResNet-50 model."""
+    model = ResNet(input_size, Bottleneck, [3, 4, 6, 3], **kwargs)
+    return model
+def ResNet_101(input_size, **kwargs):
+    """Constructs a ResNet-101 model."""
+    model = ResNet(input_size, Bottleneck, [3, 4, 23, 3], **kwargs)
+    return model
+def ResNet_152(input_size, **kwargs):
+    """Constructs a ResNet-152 model."""
+    model = ResNet(input_size, Bottleneck, [3, 8, 36, 3], **kwargs)
+    return model

backbone/models2.py ADDED Viewed

	@@ -0,0 +1,427 @@

+from torch.nn import (
+    Linear,
+    Conv2d,
+    BatchNorm1d,
+    BatchNorm2d,
+    PReLU,
+    ReLU,
+    Sigmoid,
+    Dropout,
+    MaxPool2d,
+    AdaptiveAvgPool2d,
+    Sequential,
+    Module,
+    Parameter,
+)
+import torch
+from collections import namedtuple
+import math
+##################################  Original Arcface Model #############################################################
+class Flatten(Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+def l2_norm(input, axis=1):
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(
+            channels, channels // reduction, kernel_size=1, padding=0, bias=False
+        )
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(
+            channels // reduction, channels, kernel_size=1, padding=0, bias=False
+        )
+        self.sigmoid = Sigmoid()
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+class bottleneck_IR(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth),
+            )
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth),
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class bottleneck_IR_SE(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth),
+            )
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth),
+            SEModule(depth, 16),
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class Bottleneck(namedtuple("Block", ["in_channel", "depth", "stride"])):
+    """A named tuple describing a ResNet block."""
+def get_block(in_channel, depth, num_units, stride=2):
+    return [Bottleneck(in_channel, depth, stride)] + [
+        Bottleneck(depth, depth, 1) for i in range(num_units - 1)
+    ]
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3),
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3),
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3),
+        ]
+    return blocks
+class Backbone(Module):
+    def __init__(self, num_layers, drop_ratio, mode="ir"):
+        super(Backbone, self).__init__()
+        assert num_layers in [50, 100, 152], "num_layers should be 50,100, or 152"
+        assert mode in ["ir", "ir_se"], "mode should be ir or ir_se"
+        blocks = get_blocks(num_layers)
+        if mode == "ir":
+            unit_module = bottleneck_IR
+        elif mode == "ir_se":
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(
+            Conv2d(3, 64, (3, 3), 1, 1, bias=False), BatchNorm2d(64), PReLU(64)
+        )
+        self.output_layer = Sequential(
+            BatchNorm2d(512),
+            Dropout(drop_ratio),
+            Flatten(),
+            Linear(512 * 7 * 7, 512),
+            BatchNorm1d(512),
+        )
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(
+                        bottleneck.in_channel, bottleneck.depth, bottleneck.stride
+                    )
+                )
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return l2_norm(x)
+##################################  MobileFaceNet #############################################################
+class Conv_block(Module):
+    def __init__(
+        self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1
+    ):
+        super(Conv_block, self).__init__()
+        self.conv = Conv2d(
+            in_c,
+            out_channels=out_c,
+            kernel_size=kernel,
+            groups=groups,
+            stride=stride,
+            padding=padding,
+            bias=False,
+        )
+        self.bn = BatchNorm2d(out_c)
+        self.prelu = PReLU(out_c)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.prelu(x)
+        return x
+class Linear_block(Module):
+    def __init__(
+        self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1
+    ):
+        super(Linear_block, self).__init__()
+        self.conv = Conv2d(
+            in_c,
+            out_channels=out_c,
+            kernel_size=kernel,
+            groups=groups,
+            stride=stride,
+            padding=padding,
+            bias=False,
+        )
+        self.bn = BatchNorm2d(out_c)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class Depth_Wise(Module):
+    def __init__(
+        self,
+        in_c,
+        out_c,
+        residual=False,
+        kernel=(3, 3),
+        stride=(2, 2),
+        padding=(1, 1),
+        groups=1,
+    ):
+        super(Depth_Wise, self).__init__()
+        self.conv = Conv_block(
+            in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1)
+        )
+        self.conv_dw = Conv_block(
+            groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride
+        )
+        self.project = Linear_block(
+            groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1)
+        )
+        self.residual = residual
+    def forward(self, x):
+        if self.residual:
+            short_cut = x
+        x = self.conv(x)
+        x = self.conv_dw(x)
+        x = self.project(x)
+        if self.residual:
+            output = short_cut + x
+        else:
+            output = x
+        return output
+class Residual(Module):
+    def __init__(
+        self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)
+    ):
+        super(Residual, self).__init__()
+        modules = []
+        for _ in range(num_block):
+            modules.append(
+                Depth_Wise(
+                    c,
+                    c,
+                    residual=True,
+                    kernel=kernel,
+                    padding=padding,
+                    stride=stride,
+                    groups=groups,
+                )
+            )
+        self.model = Sequential(*modules)
+    def forward(self, x):
+        return self.model(x)
+class MobileFaceNet(Module):
+    def __init__(self, embedding_size):
+        super(MobileFaceNet, self).__init__()
+        self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
+        self.conv2_dw = Conv_block(
+            64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64
+        )
+        self.conv_23 = Depth_Wise(
+            64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128
+        )
+        self.conv_3 = Residual(
+            64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1)
+        )
+        self.conv_34 = Depth_Wise(
+            64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256
+        )
+        self.conv_4 = Residual(
+            128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)
+        )
+        self.conv_45 = Depth_Wise(
+            128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512
+        )
+        self.conv_5 = Residual(
+            128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1)
+        )
+        self.conv_6_sep = Conv_block(
+            128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0)
+        )
+        self.conv_6_dw = Linear_block(
+            512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0)
+        )
+        self.conv_6_flatten = Flatten()
+        self.linear = Linear(512, embedding_size, bias=False)
+        self.bn = BatchNorm1d(embedding_size)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2_dw(out)
+        out = self.conv_23(out)
+        out = self.conv_3(out)
+        out = self.conv_34(out)
+        out = self.conv_4(out)
+        out = self.conv_45(out)
+        out = self.conv_5(out)
+        out = self.conv_6_sep(out)
+        out = self.conv_6_dw(out)
+        out = self.conv_6_flatten(out)
+        out = self.linear(out)
+        out = self.bn(out)
+        return l2_norm(out)
+##################################  Arcface head #############################################################
+class Arcface(Module):
+    # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
+    def __init__(self, embedding_size=512, classnum=51332, s=64.0, m=0.5):
+        super(Arcface, self).__init__()
+        self.classnum = classnum
+        self.kernel = Parameter(torch.Tensor(embedding_size, classnum))
+        # initial kernel
+        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
+        self.m = m  # the margin value, default is 0.5
+        self.s = s  # scalar value default is 64, see normface https://arxiv.org/abs/1704.06369
+        self.cos_m = math.cos(m)
+        self.sin_m = math.sin(m)
+        self.mm = self.sin_m * m  # issue 1
+        self.threshold = math.cos(math.pi - m)
+    def forward(self, embbedings, label):
+        # weights norm
+        nB = len(embbedings)
+        kernel_norm = l2_norm(self.kernel, axis=0)
+        # cos(theta+m)
+        cos_theta = torch.mm(embbedings, kernel_norm)
+        #         output = torch.mm(embbedings,kernel_norm)
+        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
+        cos_theta_2 = torch.pow(cos_theta, 2)
+        sin_theta_2 = 1 - cos_theta_2
+        sin_theta = torch.sqrt(sin_theta_2)
+        cos_theta_m = cos_theta * self.cos_m - sin_theta * self.sin_m
+        # this condition controls the theta+m should in range [0, pi]
+        #      0<=theta+m<=pi
+        #     -m<=theta<=pi-m
+        cond_v = cos_theta - self.threshold
+        cond_mask = cond_v <= 0
+        keep_val = cos_theta - self.mm  # when theta not in [0,pi], use cosface instead
+        cos_theta_m[cond_mask] = keep_val[cond_mask]
+        output = (
+            cos_theta * 1.0
+        )  # a little bit hacky way to prevent in_place operation on cos_theta
+        idx_ = torch.arange(0, nB, dtype=torch.long)
+        output[idx_, label] = cos_theta_m[idx_, label]
+        output *= (
+            self.s
+        )  # scale up in order to make softmax work, first introduced in normface
+        return output
+##################################  Cosface head #############################################################
+class Am_softmax(Module):
+    # implementation of additive margin softmax loss in https://arxiv.org/abs/1801.05599
+    def __init__(self, embedding_size=512, classnum=51332):
+        super(Am_softmax, self).__init__()
+        self.classnum = classnum
+        self.kernel = Parameter(torch.Tensor(embedding_size, classnum))
+        # initial kernel
+        self.kernel.data.uniform_(-1, 1).renorm_(2, 1, 1e-5).mul_(1e5)
+        self.m = 0.35  # additive margin recommended by the paper
+        self.s = 30.0  # see normface https://arxiv.org/abs/1704.06369
+    def forward(self, embbedings, label):
+        kernel_norm = l2_norm(self.kernel, axis=0)
+        cos_theta = torch.mm(embbedings, kernel_norm)
+        cos_theta = cos_theta.clamp(-1, 1)  # for numerical stability
+        phi = cos_theta - self.m
+        label = label.view(-1, 1)  # size=(B,1)
+        index = cos_theta.data * 0.0  # size=(B,Classnum)
+        index.scatter_(1, label.data.view(-1, 1), 1)
+        index = index.byte()
+        output = cos_theta * 1.0
+        output[index] = phi[index]  # only change the correct predicted output
+        output *= (
+            self.s
+        )  # scale up in order to make softmax work, first introduced in normface
+        return output

requirements-dev.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ ruff
2	+ pre-commit

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.38.1
+numpy>=2.0.0
+Pillow>=10.4.0
+torch>=2.3.1
+torchvision>=0.18.1
+tqdm>=4.66.4
+lpips>=0.1.4

util/__init__.py ADDED Viewed

File without changes

util/attack_utils.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# Helper function for extracting features from pre-trained models
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+from torch.autograd import Variable
+from util.feature_extraction_utils import warp_image, normalize_batch
+from util.prepare_utils import get_ensemble, extract_features
+from lpips_pytorch import LPIPS
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tensor_transform = transforms.ToTensor()
+pil_transform = transforms.ToPILImage()
+class Attack(nn.Module):
+    def __init__(
+        self,
+        models,
+        dim,
+        attack_type,
+        eps,
+        c_sim=0.5,
+        net_type="alex",
+        lr=0.05,
+        n_iters=100,
+        noise_size=0.001,
+        n_starts=10,
+        c_tv=None,
+        sigma_gf=None,
+        kernel_size_gf=None,
+        combination=False,
+        warp=False,
+        theta_warp=None,
+        V_reduction=None,
+    ):
+        super(Attack, self).__init__()
+        self.extractor_ens = get_ensemble(
+            models, sigma_gf, kernel_size_gf, combination, V_reduction, warp, theta_warp
+        )
+        # print("There are '{}'' models in the attack ensemble".format(len(self.extractor_ens)))
+        self.dim = dim
+        self.eps = eps
+        self.c_sim = c_sim
+        self.net_type = net_type
+        self.lr = lr
+        self.n_iters = n_iters
+        self.noise_size = noise_size
+        self.n_starts = n_starts
+        self.c_tv = None
+        self.attack_type = attack_type
+        self.warp = warp
+        self.theta_warp = theta_warp
+        if self.attack_type == "lpips":
+            self.lpips_loss = LPIPS(self.net_type).to(device)
+    def execute(self, images, dir_vec, direction):
+        images = Variable(images).to(device)
+        dir_vec = dir_vec.to(device)
+        # take norm wrt dim
+        dir_vec_norm = dir_vec.norm(dim=2).unsqueeze(2).to(device)
+        dist = torch.zeros(images.shape[0]).to(device)
+        adv_images = images.detach().clone()
+        if self.warp:
+            self.face_img = warp_image(images, self.theta_warp)
+        for start in range(self.n_starts):
+            # update adversarial images old and distance old
+            adv_images_old = adv_images.detach().clone()
+            dist_old = dist.clone()
+            # add noise to initialize ( - noise_size, noise_size)
+            noise_uniform = Variable(
+                2 * self.noise_size * torch.rand(images.size()) - self.noise_size
+            ).to(device)
+            adv_images = Variable(
+                images.detach().clone() + noise_uniform, requires_grad=True
+            ).to(device)
+            for i in range(self.n_iters):
+                adv_features = extract_features(
+                    adv_images, self.extractor_ens, self.dim
+                ).to(device)
+                # normalize feature vectors in ensembles
+                loss = direction * torch.mean(
+                    (adv_features - dir_vec) ** 2 / dir_vec_norm
+                )
+                if self.c_tv is not None:
+                    tv_out = self.total_var_reg(images, adv_images)
+                    loss -= self.c_tv * tv_out
+                if self.attack_type == "lpips":
+                    lpips_out = self.lpips_reg(images, adv_images)
+                    loss -= self.c_sim * lpips_out
+                grad = torch.autograd.grad(loss, [adv_images])
+                adv_images = adv_images + self.lr * grad[0].sign()
+                perturbation = adv_images - images
+                if self.attack_type == "sgd":
+                    perturbation = torch.clamp(
+                        perturbation, min=-self.eps, max=self.eps
+                    )
+                    adv_images = images + perturbation
+            adv_images = torch.clamp(adv_images, min=0, max=1)
+            adv_features = extract_features(
+                adv_images, self.extractor_ens, self.dim
+            ).to(device)
+            dist = torch.mean((adv_features - dir_vec) ** 2 / dir_vec_norm, dim=[1, 2])
+            if direction == 1:
+                adv_images[dist < dist_old] = adv_images_old[dist < dist_old]
+                dist[dist < dist_old] = dist_old[dist < dist_old]
+            else:
+                adv_images[dist > dist_old] = adv_images_old[dist > dist_old]
+                dist[dist > dist_old] = dist_old[dist > dist_old]
+        return adv_images.detach().cpu()
+    def lpips_reg(self, images, adv_images):
+        if self.warp:
+            face_adv = warp_image(adv_images, self.theta_warp)
+            lpips_out = self.lpips_loss(
+                normalize_batch(self.face_img).to(device),
+                normalize_batch(face_adv).to(device),
+            )[0][0][0][0] / (2 * adv_images.shape[0])
+            lpips_out += self.lpips_loss(
+                normalize_batch(images).to(device),
+                normalize_batch(adv_images).to(device),
+            )[0][0][0][0] / (2 * adv_images.shape[0])
+        else:
+            lpips_out = (
+                self.lpips_loss(
+                    normalize_batch(images).to(device),
+                    normalize_batch(adv_images).to(device),
+                )[0][0][0][0]
+                / adv_images.shape[0]
+            )
+        return lpips_out
+    def total_var_reg(images, adv_images):
+        perturbation = adv_images - images
+        tv = torch.mean(
+            torch.abs(perturbation[:, :, :, :-1] - perturbation[:, :, :, 1:])
+        ) + torch.mean(
+            torch.abs(perturbation[:, :, :-1, :] - perturbation[:, :, 1:, :])
+        )
+        return tv

util/feature_extraction_utils.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# Helper function for extracting features from pre-trained models
+import torch
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+import torch.nn as nn
+import numpy as np
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def warp_image(tensor_img, theta_warp, crop_size=112):
+    # applies affine transform theta to image and crops it
+    theta_warp = torch.Tensor(theta_warp).unsqueeze(0).to(device)
+    grid = F.affine_grid(theta_warp, tensor_img.size())
+    img_warped = F.grid_sample(tensor_img, grid)
+    img_cropped = img_warped[:, :, 0:crop_size, 0:crop_size]
+    return img_cropped
+def normalize_transforms(tfm, W, H):
+    # normalizes affine transform from cv2 for pytorch
+    tfm_t = np.concatenate((tfm, np.array([[0, 0, 1]])), axis=0)
+    transforms = np.linalg.inv(tfm_t)[0:2, :]
+    transforms[0, 0] = transforms[0, 0]
+    transforms[0, 1] = transforms[0, 1] * H / W
+    transforms[0, 2] = (
+        transforms[0, 2] * 2 / W + transforms[0, 0] + transforms[0, 1] - 1
+    )
+    transforms[1, 0] = transforms[1, 0] * W / H
+    transforms[1, 1] = transforms[1, 1]
+    transforms[1, 2] = (
+        transforms[1, 2] * 2 / H + transforms[1, 0] + transforms[1, 1] - 1
+    )
+    return transforms
+def l2_norm(input, axis=1):
+    # normalizes input with respect to second norm
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+def de_preprocess(tensor):
+    # normalize images from [-1,1] to [0,1]
+    return tensor * 0.5 + 0.5
+# normalize image to [-1,1]
+normalize = transforms.Compose([transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
+def normalize_batch(imgs_tensor):
+    normalized_imgs = torch.empty_like(imgs_tensor)
+    for i, img_ten in enumerate(imgs_tensor):
+        normalized_imgs[i] = normalize(img_ten)
+    return normalized_imgs
+def resize2d(img, size):
+    # resizes image
+    return F.adaptive_avg_pool2d(img, size)
+class face_extractor(nn.Module):
+    def __init__(self, crop_size=112, warp=False, theta_warp=None):
+        super(face_extractor, self).__init__()
+        self.crop_size = crop_size
+        self.warp = warp
+        self.theta_warp = theta_warp
+    def forward(self, input):
+        if self.warp:
+            assert input.shape[0] == 1
+            input = warp_image(input, self.theta_warp, self.crop_size)
+        return input
+class feature_extractor(nn.Module):
+    def __init__(self, model, crop_size=112, tta=True, warp=False, theta_warp=None):
+        super(feature_extractor, self).__init__()
+        self.model = model
+        self.crop_size = crop_size
+        self.tta = tta
+        self.warp = warp
+        self.theta_warp = theta_warp
+        self.model = model
+    def forward(self, input):
+        if self.warp:
+            assert input.shape[0] == 1
+            input = warp_image(input, self.theta_warp, self.crop_size)
+        batch_normalized = normalize_batch(input)
+        batch_flipped = torch.flip(batch_normalized, [3])
+        # extract features
+        self.model.eval()  # set to evaluation mode
+        if self.tta:
+            embed = self.model(batch_normalized) + self.model(batch_flipped)
+            features = l2_norm(embed)
+        else:
+            features = l2_norm(self.model(batch_normalized))
+        return features

util/prepare_utils.py ADDED Viewed

	@@ -0,0 +1,271 @@

+# Helper function for extracting features from pre-trained models
+import math
+import numbers
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+import numpy as np
+import torchvision.datasets as datasets
+from util.feature_extraction_utils import feature_extractor
+from backbone.model_irse import IR_50, IR_152
+from backbone.model_resnet import ResNet_50, ResNet_152
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tensor_transform = transforms.ToTensor()
+pil_transform = transforms.ToPILImage()
+class ImageFolderWithPaths(datasets.ImageFolder):
+    """Custom dataset that includes image file paths. Extends
+    torchvision.datasets.ImageFolder
+    """
+    # override the __getitem__ method. this is the method that dataloader calls
+    def __getitem__(self, index):
+        # this is what ImageFolder normally returns
+        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
+        # the image file path
+        path = self.imgs[index][0]
+        # make a new tuple that includes original and the path
+        tuple_with_path = original_tuple + (path,)
+        return tuple_with_path
+class GaussianSmoothing(nn.Module):
+    """
+    Apply gaussian smoothing on a
+    1d, 2d or 3d tensor. Filtering is performed seperately for each channel
+    in the input using a depthwise convolution.
+    Arguments:
+        channels (int, sequence): Number of channels of the input tensors. Output will
+            have this number of channels as well.
+        kernel_size (int, sequence): Size of the gaussian kernel.
+        sigma (float, sequence): Standard deviation of the gaussian kernel.
+        dim (int, optional): The number of dimensions of the data.
+            Default value is 2 (spatial).
+    """
+    def __init__(self, channels, kernel_size, sigma, dim=2):
+        super(GaussianSmoothing, self).__init__()
+        if isinstance(kernel_size, numbers.Number):
+            kernel_size = [kernel_size] * dim
+        if isinstance(sigma, numbers.Number):
+            sigma = [sigma] * dim
+        # The gaussian kernel is the product of the
+        # gaussian function of each dimension.
+        kernel = 1
+        meshgrids = torch.meshgrid(
+            [torch.arange(size, dtype=torch.float32) for size in kernel_size]
+        )
+        for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
+            mean = (size - 1) / 2
+            kernel *= (
+                1
+                / (std * math.sqrt(2 * math.pi))
+                * torch.exp(-(((mgrid - mean) / std) ** 2) / 2)
+            )
+        # Make sure sum of values in gaussian kernel equals 1.
+        kernel = kernel / torch.sum(kernel)
+        # Reshape to depthwise convolutional weight
+        kernel = kernel.view(1, 1, *kernel.size())
+        kernel = kernel.repeat(channels, *[1] * (kernel.dim() - 1))
+        self.register_buffer("weight", kernel)
+        self.groups = channels
+        if dim == 1:
+            self.conv = F.conv1d
+        elif dim == 2:
+            self.conv = F.conv2d
+        elif dim == 3:
+            self.conv = F.conv3d
+        else:
+            raise RuntimeError(
+                "Only 1, 2 and 3 dimensions are supported. Received {}.".format(dim)
+            )
+        self.pad_size = int(kernel_size[0] / 2)
+    def forward(self, input):
+        """
+        Apply gaussian filter to input.
+        Arguments:
+            input (torch.Tensor): Input to apply gaussian filter on.
+        Returns:
+            filtered (torch.Tensor): Filtered output.
+        """
+        input = F.pad(
+            input,
+            (self.pad_size, self.pad_size, self.pad_size, self.pad_size),
+            mode="reflect",
+        )
+        return self.conv(input, weight=self.weight, groups=self.groups)
+class dim_reduction(nn.Module):
+    def __init__(self, V):
+        super(dim_reduction, self).__init__()
+        self.V = V
+    def forward(self, input):
+        return torch.matmul(input, self.V.to(input.device))
+def get_ensemble(
+    models,
+    sigma_gf,
+    kernel_size_gf,
+    combination,
+    V_reduction,
+    warp=False,
+    theta_warp=None,
+):
+    # function prepares ensemble of feature extractors
+    # outputs list of pytorch nn models
+    feature_extractor_ensemble = []
+    if sigma_gf is not None:
+        # if apply gaussian filterng during attack
+        gaussian_filtering = GaussianSmoothing(3, kernel_size_gf, sigma_gf)
+        if V_reduction is None:
+            for model in models:
+                feature_extractor_model = nn.DataParallel(
+                    nn.Sequential(
+                        gaussian_filtering,
+                        feature_extractor(
+                            model=model, warp=warp, theta_warp=theta_warp
+                        ),
+                    )
+                ).to(device)
+                feature_extractor_ensemble.append(feature_extractor_model)
+                if combination:
+                    feature_extractor_model = nn.DataParallel(
+                        feature_extractor(model=model, warp=warp, theta_warp=theta_warp)
+                    ).to(device)
+                    feature_extractor_ensemble.append(feature_extractor_model)
+        else:
+            for i, model in enumerate(models):
+                feature_extractor_model = nn.DataParallel(
+                    nn.Sequential(
+                        gaussian_filtering,
+                        feature_extractor(
+                            model=model, warp=warp, theta_warp=theta_warp
+                        ),
+                        dim_reduction(V_reduction[i]),
+                    )
+                ).to(device)
+                feature_extractor_ensemble.append(feature_extractor_model)
+                if combination:
+                    feature_extractor_model = nn.DataParallel(
+                        nn.Sequential(
+                            feature_extractor(
+                                model=model, warp=warp, theta_warp=theta_warp
+                            ),
+                            dim_reduction(V_reduction[i]),
+                        )
+                    ).to(device)
+                    feature_extractor_ensemble.append(feature_extractor_model)
+    else:
+        if V_reduction is None:
+            for model in models:
+                feature_extractor_model = nn.DataParallel(
+                    feature_extractor(model=model, warp=warp, theta_warp=theta_warp)
+                ).to(device)
+                feature_extractor_ensemble.append(feature_extractor_model)
+        else:
+            for i, model in enumerate(models):
+                feature_extractor_model = nn.DataParallel(
+                    nn.Sequential(
+                        feature_extractor(
+                            model=model, warp=warp, theta_warp=theta_warp
+                        ),
+                        dim_reduction(V_reduction[i]),
+                    )
+                ).to(device)
+                feature_extractor_ensemble.append(feature_extractor_model)
+    return feature_extractor_ensemble
+def extract_features(imgs, feature_extractor_ensemble, dim):
+    # function computes mean feature vector of images with ensemble of feature extractors
+    features = torch.zeros(imgs.shape[0], len(feature_extractor_ensemble), dim)
+    for i, feature_extractor_model in enumerate(feature_extractor_ensemble):
+        # batch size, model in ensemble, dim
+        features_model = feature_extractor_model(imgs)
+        features[:, i, :] = features_model
+    return features
+def prepare_models(
+    model_backbones,
+    input_size,
+    model_roots,
+    kernel_size_attack,
+    sigma_attack,
+    combination,
+    using_subspace,
+    V_reduction_root,
+):
+    backbone_dict = {
+        "IR_50": IR_50(input_size),
+        "IR_152": IR_152(input_size),
+        "ResNet_50": ResNet_50(input_size),
+        "ResNet_152": ResNet_152(input_size),
+    }
+    print("Loading Attack Backbone Checkpoint '{}'".format(model_roots))
+    print("=" * 20)
+    models_attack = []
+    for i in range(len(model_backbones)):
+        model = backbone_dict[model_backbones[i]]
+        model.load_state_dict(torch.load(model_roots[i], map_location=device))
+        models_attack.append(model)
+    if using_subspace:
+        V_reduction = []
+        for i in range(len(model_backbones)):
+            V_reduction.append(torch.tensor(np.load(V_reduction_root[i])))
+        dim = V_reduction[0].shape[1]
+    else:
+        V_reduction = None
+        dim = 512
+    return models_attack, V_reduction, dim
+def prepare_data(
+    query_data_root, target_data_root, freq, batch_size, warp=False, theta_warp=None
+):
+    data = datasets.ImageFolder(query_data_root, tensor_transform)
+    subset_query = list(range(0, len(data), freq))
+    subset_gallery = [x for x in list(range(0, len(data))) if x not in subset_query]
+    query_set = torch.utils.data.Subset(data, subset_query)
+    gallery_set = torch.utils.data.Subset(data, subset_gallery)
+    if target_data_root is not None:
+        target_data = datasets.ImageFolder(target_data_root, tensor_transform)
+        target_loader = torch.utils.data.DataLoader(target_data, batch_size=batch_size)
+    else:
+        target_loader = None
+    query_loader = torch.utils.data.DataLoader(query_set, batch_size=batch_size)
+    gallery_loader = torch.utils.data.DataLoader(gallery_set, batch_size=batch_size)
+    return query_loader, gallery_loader, target_loader
+def prepare_dir_vec(dir_vec_extractor, imgs, dim, combination):
+    dir_vec = extract_features(imgs, dir_vec_extractor, dim).detach().cpu()
+    if combination:
+        dir_vec = torch.repeat_interleave(dir_vec, 2, 1)
+    return dir_vec