File size: 4,071 Bytes
49dd0e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import cv2
import numpy as np

from .model import BiSeNet

mask_regions = {
    "Background":0,
    "Skin":1,
    "L-Eyebrow":2,
    "R-Eyebrow":3,
    "L-Eye":4,
    "R-Eye":5,
    "Eye-G":6,
    "L-Ear":7,
    "R-Ear":8,
    "Ear-R":9,
    "Nose":10,
    "Mouth":11,
    "U-Lip":12,
    "L-Lip":13,
    "Neck":14,
    "Neck-L":15,
    "Cloth":16,
    "Hair":17,
    "Hat":18
}

# Borrowed from simswap
# https://github.com/neuralchen/SimSwap/blob/26c84d2901bd56eda4d5e3c5ca6da16e65dc82a6/util/reverse2original.py#L30
class SoftErosion(nn.Module):
    def __init__(self, kernel_size=15, threshold=0.6, iterations=1):
        super(SoftErosion, self).__init__()
        r = kernel_size // 2
        self.padding = r
        self.iterations = iterations
        self.threshold = threshold

        # Create kernel
        y_indices, x_indices = torch.meshgrid(torch.arange(0., kernel_size), torch.arange(0., kernel_size))
        dist = torch.sqrt((x_indices - r) ** 2 + (y_indices - r) ** 2)
        kernel = dist.max() - dist
        kernel /= kernel.sum()
        kernel = kernel.view(1, 1, *kernel.shape)
        self.register_buffer('weight', kernel)

    def forward(self, x):
        x = x.float()
        for i in range(self.iterations - 1):
            x = torch.min(x, F.conv2d(x, weight=self.weight, groups=x.shape[1], padding=self.padding))
        x = F.conv2d(x, weight=self.weight, groups=x.shape[1], padding=self.padding)

        mask = x >= self.threshold
        x[mask] = 1.0
        x[~mask] /= x[~mask].max()

        return x, mask

device = "cpu"

def init_parser(pth_path, mode="cpu"):
    global device
    device = mode
    n_classes = 19
    net = BiSeNet(n_classes=n_classes)
    if device == "cuda":
        net.cuda()
        net.load_state_dict(torch.load(pth_path))
    else:
        net.load_state_dict(torch.load(pth_path, map_location=torch.device('cpu')))
    net.eval()
    return net


def image_to_parsing(img, net):
    img = cv2.resize(img, (512, 512))
    img = img[:,:,::-1]
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    img = transform(img.copy())
    img = torch.unsqueeze(img, 0)

    with torch.no_grad():
        img = img.to(device)
        out = net(img)[0]
        parsing = out.squeeze(0).cpu().numpy().argmax(0)
        return parsing


def get_mask(parsing, classes):
    res = parsing == classes[0]
    for val in classes[1:]:
        res += parsing == val
    return res

def swap_regions(source, target, net, smooth_mask, includes=[1,2,3,4,5,10,11,12,13], blur=10):
    parsing = image_to_parsing(source, net)

    if len(includes) == 0:
        return source, np.zeros_like(source)

    include_mask = get_mask(parsing, includes)
    mask = np.repeat(include_mask[:, :, np.newaxis], 3, axis=2).astype("float32")

    if smooth_mask is not None:
        mask_tensor = torch.from_numpy(mask.copy().transpose((2, 0, 1))).float().to(device)
        face_mask_tensor = mask_tensor[0] + mask_tensor[1]
        soft_face_mask_tensor, _ = smooth_mask(face_mask_tensor.unsqueeze_(0).unsqueeze_(0))
        soft_face_mask_tensor.squeeze_()
        mask = np.repeat(soft_face_mask_tensor.cpu().numpy()[:, :, np.newaxis], 3, axis=2)

    if blur > 0:
        mask = cv2.GaussianBlur(mask, (0, 0), blur)

    resized_source = cv2.resize((source/255).astype("float32"), (512, 512))
    resized_target = cv2.resize((target/255).astype("float32"), (512, 512))

    result = mask * resized_source + (1 - mask) * resized_target
    normalized_result = (result - np.min(result)) / (np.max(result) - np.min(result))
    result = cv2.resize((result*255).astype("uint8"), (source.shape[1], source.shape[0]))

    return result

def mask_regions_to_list(values):
    out_ids = []
    for value in values:
        if value in mask_regions.keys():
            out_ids.append(mask_regions.get(value))
    return out_ids