deepfake-detect

Runtime error

App Files Files Community

Sara Mandelli commited on Oct 6, 2021

Commit

6bd8735

•

1 Parent(s): f6b58ff

Update detector

Browse files

Files changed (4) hide show

gan_vs_real_detector.py +38 -60
utils/architectures.py +422 -0
utils/python_patch_extractor/PatchExtractor.py +306 -0
utils/python_patch_extractor/__init__.py +0 -0

gan_vs_real_detector.py CHANGED Viewed

@@ -11,39 +11,14 @@ torch.multiprocessing.set_sharing_strategy('file_system')
 import albumentations as A
 import albumentations.pytorch as Ap
 from utils import architectures
 from PIL import Image
 class Detector:
     def __init__(self):
-        # model directory and path for detector A
-        # model_A_dir = 'weights/method_A/net-EfficientNetB4_lr-0.001_img_aug-[\'flip\', \'rotate\', \'clahe\', \'blur\', ' \
-        #               '\'brightness&contrast\', \'jitter\', \'downscale\', \'hsv\', \'resize\', \'jpeg\']' \
-        #               '_img_aug_p-0.5_patch_size-128_patch_number-1_batch_size-250_num_classes-2'
-        #
-        # # model directory and path for detector B
-        # model_B_dir = 'weights/method_B/net-EfficientNetB4_lr-0.001_aug-[\'flip\', \'rotate\', \'clahe\', \'blur\', ' \
-        #               '\'crop&resize\', \'brightness&contrast\', \'jitter\', \'downscale\', \'hsv\']' \
-        #               '_aug_p-0.5_jpeg_aug_p-0.7_patch_size-128_patch_number-1_batch_size-250_num_classes-2'
-        #
-        # # model directory and path for detector C
-        # model_C_dir = 'weights/method_C/net-EfficientNetB4_lr-0.001_aug-[\'flip\', \'rotate\', \'clahe\', \'blur\',' \
-        #               ' \'crop&resize\', \'brightness&contrast\', \'jitter\', \'downscale\', \'hsv\']' \
-        #               '_aug_p-0.5_jpeg_aug_p-0_patch_size-128_patch_number-5_batch_size-50_num_classes-2'
-        #
-        # # model directory and path for detector D
-        # model_D_dir = 'weights/method_D/net-EfficientNetB4_lr-0.001_aug-[\'flip\', \'rotate\', \'clahe\', \'blur\',' \
-        #               '\'crop&resize\', \'brightness&contrast\', \'jitter\', \'downscale\', \'hsv\']' \
-        #               '_aug_p-0.5_jpeg_aug_p-0_patch_size-128_patch_number-10_batch_size-25_num_classes-2'
-        #
-        # # model directory for detector E
-        # model_E_dir = 'weights/method_E/net-EfficientNetB4_lr-0.001_aug-[\'flip\', \'rotate\', \'clahe\', \'blur\',' \
-        #               ' \'crop&resize\', \'brightness&contrast\', \'jitter\', \'downscale\', \'hsv\']' \
-        #               '_aug_p-0.5_jpeg_aug_p-0.7_patch_size-128_patch_number-1_batch_size-250_num_classes-2'
-        self.weights_path_list = [os.path.join('weights', f'method_{x}.pth') for x in 'ABCDE']
-        # self.model_path = os.path.join(model_dir, 'bestval.pth')
         # GPU configuration if available
         self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
@@ -72,17 +47,15 @@ class Detector:
             Ap.transforms.ToTensorV2()
         ]
         self.trans = A.Compose(transform)
         self.cropper = A.RandomCrop(width=128, height=128, always_apply=True, p=1.)
         self.criterion = torch.nn.CrossEntropyLoss(reduction='none')
-    def synth_real_detector(self, img_path: str, n_patch: int = 50):
         # Load image:
         img = np.asarray(Image.open(img_path))
-        # Optout if image is non conforming
         if img.shape == ():
             print('{} None dimension'.format(img_path))
             return None
@@ -96,47 +69,52 @@ class Detector:
             print('Omitting alpha channel')
             img = img[:, :, :3]
-        # Extract test_N random patches from image:
-        patch_list = [self.cropper(image=img)['image'] for _ in range(n_patch)]
-        # Normalization
-        transf_patch_list = [self.trans(image=patch)['image'] for patch in patch_list]
-        # Compute scores
-        transf_patch_tensor = torch.stack(transf_patch_list, dim=0).to(self.device)
-        with torch.no_grad():
-            patch_scores = self.net(transf_patch_tensor)
-        softmax_scores = torch.softmax(patch_scores, dim=1)
-        predictions = torch.argmax(softmax_scores, dim=1)
-        # Majority voting on patches
-        if sum(predictions) > len(predictions) // 2:
-            majority_voting = 1
-        else:
-            majority_voting = 0
-        # get an output score from softmax scores:
-        # LLR < 0: real
-        # LLR > 0: synthetic
-        sign_predictions = majority_voting * 2 - 1
-        # select only the scores associated with the estimated class (by majority voting)
-        softmax_scores = softmax_scores[:, majority_voting]
-        normalized_prediction = torch.max(softmax_scores).item() * sign_predictions
-        return normalized_prediction
 def main():
-    # img_path
-    img_path = "/nas/public/exchange/semafor/eval1/stylegan2/100k-generated-images/car-512x384_cropped/stylegan2-" \
-               "config-f-psi-0.5/097000/097001.png"
-    # number of random patches to extract from images
-    test_N = 50
     detector = Detector()
-    detector.synth_real_detector(img_path, test_N)
     return 0

 import albumentations as A
 import albumentations.pytorch as Ap
 from utils import architectures
+from utils.python_patch_extractor.PatchExtractor import PatchExtractor
 from PIL import Image
 class Detector:
     def __init__(self):
+        self.weights_path_list = [os.path.join('/nas/home/nbonettini/projects/StyleGAN3-detection/weights', f'method_{x}.pth') for x in 'ABCDE']
         # GPU configuration if available
         self.device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
             Ap.transforms.ToTensorV2()
         ]
         self.trans = A.Compose(transform)
         self.cropper = A.RandomCrop(width=128, height=128, always_apply=True, p=1.)
         self.criterion = torch.nn.CrossEntropyLoss(reduction='none')
+    def synth_real_detector(self, img_path: str, n_patch: int = 200):
         # Load image:
         img = np.asarray(Image.open(img_path))
+        # Opt-out if image is non conforming
         if img.shape == ():
             print('{} None dimension'.format(img_path))
             return None
             print('Omitting alpha channel')
             img = img[:, :, :3]
+        img_net_scores = []
+        for net_idx, net in enumerate(self.nets):
+            if net_idx == 0:
+                # only for detector A, extract N = 200 random patches per image
+                patch_list = [self.cropper(image=img)['image'] for _ in range(n_patch)]
+            else:
+                # for detectors B, C, D, E, extract patches aligned with the 8 x 8 pixel grid:
+                # we want more or less 200 patches per img
+                stride_0 = ((((img.shape[0] - 128) // 20) + 7) // 8) * 8
+                stride_1 = (((img.shape[1] - 128) // 10 + 7) // 8) * 8
+                pe = PatchExtractor(dim=(128, 128, 3), stride=(stride_0, stride_1, 3))
+                patches = pe.extract(img)
+                patch_list = list(patches.reshape((patches.shape[0]*patches.shape[1], 128, 128, 3)))
+            # Normalization
+            transf_patch_list = [self.trans(image=patch)['image'] for patch in patch_list]
+            # Compute scores
+            transf_patch_tensor = torch.stack(transf_patch_list, dim=0).to(self.device)
+            with torch.no_grad():
+                patch_scores = net(transf_patch_tensor).cpu().numpy()
+                patch_predictions = np.argmax(patch_scores, axis=1)
+            maj_voting = np.any(patch_predictions).astype(int)
+            scores_maj_voting = patch_scores[:, maj_voting]
+            img_net_scores.append(np.nanmax(scores_maj_voting) if maj_voting == 1 else -np.nanmax(scores_maj_voting))
+        # final score is the average among the 5 scores returned by the detectors
+        img_score = np.mean(img_net_scores)
+        return img_score
 def main():
+    # img_path on fermi:
+    img_path = '/home/nbonettini/nvidia_temp/nvidia-alias-free-gan/faces/alias-free-r-afhqv2-512x512/seed40000.png'
     detector = Detector()
+    score = detector.synth_real_detector(img_path)
+    print('Image Score: {}'.format(score))
     return 0

utils/architectures.py ADDED Viewed

	@@ -0,0 +1,422 @@

+import torch
+import torch.nn as nn
+import numpy as np
+from torchvision import transforms
+import torch.nn.functional as F
+from efficientnet_pytorch import EfficientNet
+from efficientnet_pytorch.utils import (
+    round_filters,
+    round_repeats,
+    drop_connect,
+    get_same_padding_conv2d,
+    get_model_params,
+    efficientnet_params,
+    load_pretrained_weights,
+    Swish,
+    MemoryEfficientSwish,
+)
+from efficientnet_pytorch.model import MBConvBlock
+from torchvision.models import resnet
+from pytorchcv.model_provider import get_model
+class Head(nn.Module):
+    def __init__(self, in_f, out_f):
+        super(Head, self).__init__()
+        self.f = nn.Flatten()
+        self.l = nn.Linear(in_f, 512)
+        self.d = nn.Dropout(0.5)
+        self.o = nn.Linear(512, out_f)
+        self.b1 = nn.BatchNorm1d(in_f)
+        self.b2 = nn.BatchNorm1d(512)
+        self.r = nn.ReLU()
+    def forward(self, x):
+        x = self.f(x)
+        x = self.b1(x)
+        x = self.d(x)
+        x = self.l(x)
+        x = self.r(x)
+        x = self.b2(x)
+        x = self.d(x)
+        out = self.o(x)
+        return out
+class FCN(nn.Module):
+    def __init__(self, base, in_f, out_f):
+        super(FCN, self).__init__()
+        self.base = base
+        self.h1 = Head(in_f, out_f)
+    def forward(self, x):
+        x = self.base(x)
+        return self.h1(x)
+class BaseFCN(nn.Module):
+    def __init__(self, n_classes: int):
+        super(BaseFCN, self).__init__()
+        self.f = nn.Flatten()
+        self.l = nn.Linear(625, 256)
+        self.d = nn.Dropout(0.5)
+        self.o = nn.Linear(256, n_classes)
+    def forward(self, x):
+        x = self.f(x)
+        x = self.l(x)
+        x = self.d(x)
+        out = self.o(x)
+        return out
+    def get_trainable_parameters_cooccur(self):
+        return self.parameters()
+class BaseFCNHigh(nn.Module):
+    def __init__(self, n_classes: int):
+        super(BaseFCNHigh, self).__init__()
+        self.f = nn.Flatten()
+        self.l = nn.Linear(625, 512)
+        self.d = nn.Dropout(0.5)
+        self.o = nn.Linear(512, n_classes)
+    def forward(self, x):
+        x = self.f(x)
+        x = self.l(x)
+        x = self.d(x)
+        out = self.o(x)
+        return out
+    def get_trainable_parameters_cooccur(self):
+        return self.parameters()
+class BaseFCN4(nn.Module):
+    def __init__(self, n_classes: int):
+        super(BaseFCN4, self).__init__()
+        self.f = nn.Flatten()
+        self.l1 = nn.Linear(625, 512)
+        self.l2 = nn.Linear(512, 384)
+        self.l3 = nn.Linear(384, 256)
+        self.d = nn.Dropout(0.5)
+        self.o = nn.Linear(256, n_classes)
+    def forward(self, x):
+        x = self.f(x)
+        x = self.l1(x)
+        x = self.d(x)
+        x = self.l2(x)
+        x = self.d(x)
+        x = self.l3(x)
+        x = self.d(x)
+        out = self.o(x)
+        return out
+    def get_trainable_parameters_cooccur(self):
+        return self.parameters()
+class BaseFCNBnR(nn.Module):
+    def __init__(self, n_classes: int):
+        super(BaseFCNBnR, self).__init__()
+        self.f = nn.Flatten()
+        self.b1 = nn.BatchNorm1d(625)
+        self.b2 = nn.BatchNorm1d(256)
+        self.l = nn.Linear(625, 256)
+        self.d = nn.Dropout(0.5)
+        self.o = nn.Linear(256, n_classes)
+        self.r = nn.ReLU()
+    def forward(self, x):
+        x = self.f(x)
+        x = self.b1(x)
+        x = self.d(x)
+        x = self.l(x)
+        x = self.r(x)
+        x = self.b2(x)
+        x = self.d(x)
+        out = self.o(x)
+        return out
+    def get_trainable_parameters_cooccur(self):
+        return self.parameters()
+def forward_resnet_conv(net, x, upto: int = 4):
+    """
+    Forward ResNet only in its convolutional part
+    :param net:
+    :param x:
+    :param upto:
+    :return:
+    """
+    x = net.conv1(x)  # N / 2
+    x = net.bn1(x)
+    x = net.relu(x)
+    x = net.maxpool(x)  # N / 4
+    if upto >= 1:
+        x = net.layer1(x)  # N / 4
+    if upto >= 2:
+        x = net.layer2(x)  # N / 8
+    if upto >= 3:
+        x = net.layer3(x)  # N / 16
+    if upto >= 4:
+        x = net.layer4(x)  # N / 32
+    return x
+class FeatureExtractor(nn.Module):
+    """
+    Abstract class to be extended when supporting features extraction.
+    It also provides standard normalized and parameters
+    """
+    def features(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
+    def get_trainable_parameters(self):
+        return self.parameters()
+    @staticmethod
+    def get_normalizer():
+        return transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+class FeatureExtractorGray(nn.Module):
+    """
+    Abstract class to be extended when supporting features extraction.
+    It also provides standard normalized and parameters
+    """
+    def features(self, x: torch.Tensor) -> torch.Tensor:
+        raise NotImplementedError
+    def get_trainable_parameters(self):
+        return self.parameters()
+    @staticmethod
+    def get_normalizer():
+        return transforms.Normalize(mean=[0.479], std=[0.226])
+class EfficientNetGen(FeatureExtractor):
+    def __init__(self, model: str, n_classes: int, pretrained: bool):
+        super(EfficientNetGen, self).__init__()
+        if pretrained:
+            self.efficientnet = EfficientNet.from_pretrained(model)
+        else:
+            self.efficientnet = EfficientNet.from_name(model)
+        self.classifier = nn.Linear(self.efficientnet._conv_head.out_channels, n_classes)
+        del self.efficientnet._fc
+    def features(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.efficientnet.extract_features(x)
+        x = self.efficientnet._avg_pooling(x)
+        x = x.flatten(start_dim=1)
+        return x
+    def forward(self, x):
+        x = self.features(x)
+        x = self.efficientnet._dropout(x)
+        x = self.classifier(x)
+        # x = F.softmax(x, dim=-1)
+        return x
+class EfficientNetB0(EfficientNetGen):
+    def __init__(self, n_classes: int, pretrained: bool):
+        super(EfficientNetB0, self).__init__(model='efficientnet-b0', n_classes=n_classes, pretrained=pretrained)
+class EfficientNetB4(EfficientNetGen):
+    def __init__(self, n_classes: int, pretrained: bool):
+        super(EfficientNetB4, self).__init__(model='efficientnet-b4', n_classes=n_classes, pretrained=pretrained)
+class EfficientNetGenPostStem(FeatureExtractor):
+    def __init__(self, model: str, n_classes: int, pretrained: bool, n_ir_blocks: int):
+        super(EfficientNetGenPostStem, self).__init__()
+        if pretrained:
+            self.efficientnet = EfficientNet.from_pretrained(model)
+        else:
+            self.efficientnet = EfficientNet.from_name(model)
+        self.n_ir_blocks = n_ir_blocks
+        self.classifier = nn.Linear(self.efficientnet._conv_head.out_channels, n_classes)
+        # modify STEM
+        in_channels = 3  # rgb
+        out_channels = round_filters(32, self.efficientnet._global_params)
+        Conv2d = get_same_padding_conv2d(image_size=self.efficientnet._global_params.image_size)
+        self.efficientnet._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=1, bias=False)
+        self.init_blocks_args = self.efficientnet._blocks_args[0]
+        self.init_blocks_args = self.init_blocks_args._replace(output_filters=32)
+        self.init_block = MBConvBlock(self.init_blocks_args, self.efficientnet._global_params)
+        self.last_block_args = self.efficientnet._blocks_args[0]
+        self.last_block_args = self.last_block_args._replace(output_filters=32, stride=2)
+        self.last_block = MBConvBlock(self.last_block_args, self.efficientnet._global_params)
+        del self.efficientnet._fc
+    def features(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.efficientnet._swish(self.efficientnet._bn0(self.efficientnet._conv_stem(x)))
+        # init blocks
+        for b in range(self.n_ir_blocks - 1):
+            x = self.init_block(x, drop_connect_rate=0)
+        # last block
+        x = self.last_block(x, drop_connect_rate=0)
+        # standard blocks efficientNet:
+        for idx, block in enumerate(self.efficientnet._blocks):
+            drop_connect_rate = self.efficientnet._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self.efficientnet._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+        x = self.efficientnet._swish(self.efficientnet._bn1(self.efficientnet._conv_head(x)))
+        x = self.efficientnet._avg_pooling(x)
+        x = x.flatten(start_dim=1)
+        return x
+    def forward(self, x):
+        x = self.features(x)
+        x = self.efficientnet._dropout(x)
+        x = self.classifier(x)
+        # x = F.softmax(x, dim=-1)
+        return x
+class EfficientNetB0PostStemIR(EfficientNetGenPostStem):
+    def __init__(self, n_classes: int, pretrained: bool, n_ir_blocks: int):
+        super(EfficientNetB0PostStemIR, self).__init__(model='efficientnet-b0', n_classes=n_classes,
+                                                        pretrained=pretrained, n_ir_blocks=n_ir_blocks)
+class EfficientNetGenPreStem(FeatureExtractor):
+    def __init__(self, model: str, n_classes: int, pretrained: bool, n_ir_blocks: int):
+        super(EfficientNetGenPreStem, self).__init__()
+        if pretrained:
+            self.efficientnet = EfficientNet.from_pretrained(model)
+        else:
+            self.efficientnet = EfficientNet.from_name(model)
+        self.n_ir_blocks = n_ir_blocks
+        self.classifier = nn.Linear(self.efficientnet._conv_head.out_channels, n_classes)
+        self.init_block_args = self.efficientnet._blocks_args[0]
+        self.init_block_args = self.init_block_args._replace(input_filters=3, output_filters=32)
+        self.init_block = MBConvBlock(self.init_block_args, self.efficientnet._global_params)
+        self.last_blocks_args = self.efficientnet._blocks_args[0]
+        self.last_blocks_args = self.last_blocks_args._replace(output_filters=32)
+        self.last_block = MBConvBlock(self.last_blocks_args, self.efficientnet._global_params)
+        # modify STEM
+        in_channels = 32
+        out_channels = round_filters(32, self.efficientnet._global_params)
+        Conv2d = get_same_padding_conv2d(image_size=self.efficientnet._global_params.image_size)
+        self.efficientnet._conv_stem = Conv2d(in_channels, out_channels, kernel_size=3, stride=2, bias=False)
+        del self.efficientnet._fc
+    def features(self, x: torch.Tensor) -> torch.Tensor:
+        # init block
+        x = self.init_block(x, drop_connect_rate=0)
+        # other blocks
+        for b in range(self.n_ir_blocks - 1):
+            x = self.last_block(x, drop_connect_rate=0)
+        # standard stem efficientNet:
+        x = self.efficientnet._swish(self.efficientnet._bn0(self.efficientnet._conv_stem(x)))
+        # standard blocks efficientNet:
+        for idx, block in enumerate(self.efficientnet._blocks):
+            drop_connect_rate = self.efficientnet._global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self.efficientnet._blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+        x = self.efficientnet._swish(self.efficientnet._bn1(self.efficientnet._conv_head(x)))
+        x = self.efficientnet._avg_pooling(x)
+        x = x.flatten(start_dim=1)
+        return x
+    def forward(self, x):
+        x = self.features(x)
+        x = self.efficientnet._dropout(x)
+        x = self.classifier(x)
+        # x = F.softmax(x, dim=-1)
+        return x
+class EfficientNetB0PreStemIR(EfficientNetGenPreStem):
+    def __init__(self, n_classes: int, pretrained: bool, n_ir_blocks: int):
+        super(EfficientNetB0PreStemIR, self).__init__(model='efficientnet-b0', n_classes=n_classes,
+                                                        pretrained=pretrained, n_ir_blocks=n_ir_blocks)
+class ResNet50(FeatureExtractor):
+    def __init__(self, n_classes: int, pretrained: bool):
+        super(ResNet50, self).__init__()
+        self.resnet = resnet.resnet50(pretrained=pretrained)
+        self.fc = nn.Linear(in_features=self.resnet.fc.in_features, out_features=n_classes)
+        del self.resnet.fc
+    def features(self, x):
+        x = forward_resnet_conv(self.resnet, x)
+        x = self.resnet.avgpool(x).flatten(start_dim=1)
+        return x
+    def forward(self, x):
+        x = self.features(x)
+        x = self.fc(x)
+        return x
+"""
+Xception from Kaggle
+"""
+class XceptionWeiHao(FeatureExtractor):
+    def __init__(self, n_classes: int, pretrained: bool):
+        super(XceptionWeiHao, self).__init__()
+        self.model = get_model("xception", pretrained=pretrained)
+        self.model = nn.Sequential(*list(self.model.children())[:-1])  # Remove original output layer
+        self.model[0].final_block.pool = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)))
+        self.model = FCN(self.model, 2048, n_classes)
+    def features(self, x: torch.Tensor) -> torch.Tensor:
+        return self.model.base(x)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.features(x)
+        return self.model.h1(x)

utils/python_patch_extractor/PatchExtractor.py ADDED Viewed

	@@ -0,0 +1,306 @@

+"""
+@Author: Nicolo' Bonettini
+@Author: Luca Bondi
+@Author: Francesco Picetti
+"""
+import random
+import numpy as np
+from skimage.util import view_as_windows, view_as_blocks
+# Score functions ---
+def mid_intensity_high_texture(in_content):
+    """
+    Quality function that returns higher scores for mid intensity patches with high texture levels. Empirical.
+    :type in_content: ndarray
+    :param in_content : 2D or 3D ndarray. Values are expected in [0,1] if in_content is float, in [0,255] if in_content is uint8
+    :return score: float
+        score in [0,1].
+    """
+    if in_content.dtype == np.uint8:
+        in_content = in_content / 255.
+    mean_std_weight = .7
+    in_content = in_content.flatten()
+    mean_val = in_content.mean()
+    std_val = in_content.std()
+    ch_mean_score = -4 * mean_val ** 2 + 4 * mean_val
+    ch_std_score = 1 - np.exp(-2 * np.log(10) * std_val)
+    score = mean_std_weight * ch_mean_score + (1 - mean_std_weight) * ch_std_score
+    return score
+def count_patches(in_size, patch_size, patch_stride):
+    """
+    Compute the number of patches
+    :param in_size:
+    :param patch_size:
+    :param patch_stride:
+    :return:
+    """
+    win_indices_shape = (((np.array(in_size) - np.array(patch_size))
+                          // np.array(patch_stride)) + 1)
+    return int(np.prod(win_indices_shape))
+class PatchExtractor:
+    def __init__(self, dim, offset=None, stride=None, rand=None, function=None, threshold=None,
+                 num=None, indexes=None):
+        """
+        N-dimensional patch extractor
+        Args:
+        :param in_content : ndarray
+            the content to process as a numpy array of ndim dimensions
+        :param dim : tuple
+            patch_array dimensions as a tuple of ndim elements
+        Named args:
+        :param offset : tuple
+            the offsets along each axis as a tuple of ndim elements
+        :param stride : tuple
+            the stride of each axis as a tuple of ndim elements
+        :param rand : bool
+            randomize patch_array order. Mutually exclusive with function_handler
+        :param function : function
+            patch quality function handler. Mutually exclusive with rand
+        :param threshold: float
+            minimum quality threshold
+        :param num : int
+            maximum number of returned patch_array.  Mutually exclusive with indexes
+        :param indexes : list|ndarray
+            explicitly return corresponding patch indexes (function_handler or C order used to index patch_array).
+            Mutually exclusive with num
+        :return ndarray: patch_array
+            array of patch_array
+            if rand==False and function_handler==None and num==None and indexes==None:
+                patch_array.ndim = 2 * in_content.ndim
+            else:
+                patch_array.ndim = 1 + in_content.ndim
+        """
+        # Arguments parser ---
+        if not isinstance(dim, tuple):
+            raise ValueError('dim must be a tuple')
+        self.dim = dim
+        ndim = len(dim)
+        self.ndim = ndim
+        if offset is None:
+            offset = tuple([0] * ndim)
+        if not isinstance(offset, tuple):
+            raise ValueError('offset must be a tuple')
+        if len(offset) != ndim:
+            raise ValueError('offset must a tuple of length {:d}'.format(ndim))
+        self.offset = offset
+        if stride is None:
+            stride = dim
+        if not isinstance(stride, tuple):
+            raise ValueError('stride must be a tuple')
+        if len(stride) != ndim:
+            raise ValueError('stride must a tuple of length {:d}'.format(ndim))
+        self.stride = stride
+        if rand is not None and function is not None:
+            raise ValueError('rand and function cannot be set at the same time')
+        if rand is None:
+            rand = False
+        if not isinstance(rand, bool):
+            raise ValueError('rand must be a boolean')
+        self.rand = rand
+        if function is not None and not callable(function):
+            raise ValueError('function must be a function handler')
+        self.function_handler = function
+        if threshold is None:
+            threshold = 0.0
+        if not isinstance(threshold, float):
+            raise ValueError('threshold must be a float')
+        self.threshold = threshold
+        if num is not None and indexes is not None:
+            raise ValueError('num and indexes cannot be set at the same time')
+        if num is not None and not isinstance(num, int):
+            raise ValueError('num must be an int')
+        self.num = num
+        if indexes is not None and not isinstance(indexes, list) and not isinstance(indexes, np.ndarray):
+            raise ValueError('indexes must be an list or a 1d ndarray')
+        if indexes is not None:
+            indexes = np.array(indexes).flatten()
+        self.indexes = indexes
+        self.in_content_original_shape = None
+        self.in_content_cropped_shape = None
+    def extract(self, in_content):
+        if not isinstance(in_content, np.ndarray):
+            raise ValueError('in_content must be of type: ' + str(np.ndarray))
+        if in_content.ndim != self.ndim:
+            raise ValueError('in_content shape must a tuple of length {:d}'.format(self.ndim))
+        self.in_content_original_shape = in_content.shape
+        # Offset ---
+        for dim_idx, dim_offset in enumerate(self.offset):
+            dim_max = in_content.shape[dim_idx]
+            in_content = in_content.take(range(dim_offset, dim_max), axis=dim_idx)
+        # Patch list ---
+        if self.dim == self.stride:
+            in_content_crop = in_content
+            for dim_idx in range(self.ndim):
+                dim_max = (in_content.shape[dim_idx] // self.dim[dim_idx]) * self.dim[dim_idx]
+                in_content_crop = in_content_crop.take(range(0, dim_max), axis=dim_idx)
+            patch_array = view_as_blocks(in_content_crop, self.dim)
+        else:
+            patch_array = view_as_windows(in_content, self.dim, self.stride)
+        patch_array = np.ascontiguousarray(patch_array)
+        patch_idx = patch_array.shape[:self.ndim]
+        self.in_content_cropped_shape = tuple((np.asarray(patch_idx) - 1) * np.asarray(self.stride) + np.asarray(self.dim))
+        # Evaluate patch_array or rand sort ---
+        if self.rand:
+            patch_array.shape = (-1,) + self.dim
+            random.shuffle(patch_array)
+        else:
+            if self.function_handler is not None:
+                patch_array.shape = (-1,) + self.dim
+                patch_scores = np.asarray(list(map(self.function_handler, patch_array)))
+                sort_idxs = np.argsort(patch_scores)[::-1]
+                patch_scores = patch_scores[sort_idxs]
+                patch_array = patch_array[sort_idxs]
+                patch_array = patch_array[patch_scores >= self.threshold]
+        if self.num is not None:
+            patch_array.shape = (-1,) + self.dim
+            patch_array = patch_array[:self.num]
+        if self.indexes is not None:
+            patch_array.shape = (-1,) + self.dim
+            patch_array = patch_array[self.indexes]
+        return patch_array
+    def extract_call(self, args):  # TODO: verify
+        in_content = args.pop('in_content')
+        dim = args.pop('dim')
+        return self.extract(in_content)
+    def reconstruct(self, patch_array):
+        """
+        Reconstruct the N-dim image from the patch_array that has been extracted previously
+        :param patch_array: array of patches as output of patch_extractor
+        :return:
+        """
+        # Arguments parser ---
+        if not isinstance(patch_array, np.ndarray):
+            raise ValueError('patch_array must be of type: ' + str(np.ndarray))
+        ndim = patch_array.ndim // 2
+        # if not isinstance(patch_stride, tuple):
+        #     raise ValueError('patch_stride must be a tuple')
+        # if len(patch_stride) != ndim:
+        #     raise ValueError('patch_stride must be a tuple of length {:d}'.format(ndim))
+        #
+        # if not isinstance(image_shape, tuple):
+        #     raise ValueError('patch_idx must be a tuple')
+        # if len(image_shape) != ndim:
+        #     raise ValueError('patch_idx must be a tuple of length {:d}'.format(ndim))
+        patch_stride = self.stride
+        image_shape = self.in_content_cropped_shape
+        patch_shape = patch_array.shape[-ndim:]
+        patch_idx = patch_array.shape[:ndim]
+        image_shape_computed = tuple((np.array(patch_idx) - 1) * np.array(patch_stride) + np.array(patch_shape))
+        if not image_shape == image_shape_computed:
+            raise ValueError('There is something wrong with the dimensions!')
+        if ndim > 4:
+            raise ValueError('For now, it works only in 4D, sorry!')
+        numpatches = count_patches(image_shape, patch_shape, patch_stride)
+        patch_array_unwrapped = patch_array.reshape(numpatches, *patch_shape)
+        image_recon = np.zeros(image_shape)
+        norm_mask = np.zeros(image_shape)
+        counter = 0
+        for h in np.arange(0, image_shape[0] - patch_shape[0] + 1, patch_stride[0]):
+            if ndim > 1:
+                for i in np.arange(0, image_shape[1] - patch_shape[1] + 1, patch_stride[1]):
+                    if ndim > 2:
+                        for j in np.arange(0, image_shape[2] - patch_shape[2] + 1, patch_stride[2]):
+                            if ndim > 3:
+                                for k in np.arange(0, image_shape[3] - patch_shape[3] + 1, patch_stride[3]):
+                                    image_recon[h:h + patch_shape[0], i:i + patch_shape[1], j:j + patch_shape[2],
+                                    k:k + patch_shape[3]] += patch_array_unwrapped[counter, :, :, :, :]
+                                    norm_mask[h:h + patch_shape[0], i:i + patch_shape[1], j:j + patch_shape[2],
+                                    k:k + patch_shape[3]] += 1
+                                    counter += 1
+                            else:
+                                image_recon[h:h + patch_shape[0], i:i + patch_shape[1],
+                                j:j + patch_shape[2]] += patch_array_unwrapped[counter, :, :, :]
+                                norm_mask[h:h + patch_shape[0], i:i + patch_shape[1], j:j + patch_shape[2]] += 1
+                                counter += 1
+                    else:
+                        image_recon[h:h + patch_shape[0], i:i + patch_shape[1]] += patch_array_unwrapped[counter, :, :]
+                        norm_mask[h:h + patch_shape[0], i:i + patch_shape[1]] += 1
+                        counter += 1
+            else:
+                image_recon[h:h + patch_shape[0]] += patch_array_unwrapped[counter, :]
+                norm_mask[h:h + patch_shape[0]] += 1
+                counter += 1
+        image_recon /= norm_mask
+        return image_recon
+def main():
+    in_shape = (644, 481, 3)
+    dim = (120, 120, 3)
+    stride = (7, 90, 90, 3)
+    offset = (1, 0, 0, 0)
+    in_content = np.random.randint(256, size=in_shape).astype(np.uint8)
+    # args = {'in_content': in_content,
+    #         'dim': dim,
+    #         'offset': offset,
+    #         'stride': stride,
+    #         }
+    # patch_array = patch_extractor_call(args)
+    pe = PatchExtractor(dim)
+    patch_array = pe.extract(in_content)
+    print('patch_array.shape = ' + str(patch_array.shape))
+    img_recon = pe.reconstruct(patch_array)
+    print('img_recon.shape = ' + str(img_recon.shape))
+if __name__ == "__main__":
+    main()

utils/python_patch_extractor/__init__.py ADDED Viewed

File without changes