Spaces:

hylee
/

photo2cartoon

Runtime error

App Files Files Community

hylee commited on May 4, 2022

Commit

eb7d2bb

•

1 Parent(s): 545c8cd

init

Browse files

Files changed (31) hide show

README.md +1 -0
app.py +100 -0
p2c/cog.yaml +27 -0
p2c/data_process.py +30 -0
p2c/dataset.py +108 -0
p2c/dataset/README.md +20 -0
p2c/images/QRcode.jpg +0 -0
p2c/images/data_process.jpg +0 -0
p2c/images/photo_test.jpg +0 -0
p2c/images/results.png +0 -0
p2c/images/title.png +0 -0
p2c/models/UGATIT_sadalin_hourglass.py +489 -0
p2c/models/__init__.py +3 -0
p2c/models/face_features.py +31 -0
p2c/models/mobilefacenet.py +258 -0
p2c/models/model_mobilefacenet.pth +3 -0
p2c/models/networks.py +485 -0
p2c/models/photo2cartoon_weights.onnx +3 -0
p2c/models/photo2cartoon_weights.pt +3 -0
p2c/predict.py +57 -0
p2c/test.py +63 -0
p2c/test_onnx.py +49 -0
p2c/train.py +84 -0
p2c/utils/__init__.py +2 -0
p2c/utils/face_detect.py +80 -0
p2c/utils/face_seg.py +44 -0
p2c/utils/preprocess.py +54 -0
p2c/utils/seg_model_384.pb +3 -0
p2c/utils/utils.py +94 -0
packages.txt +2 -0
requirements.txt +9 -0

README.md CHANGED Viewed

@@ -1,4 +1,5 @@
 ---
 title: Photo2cartoon
 emoji: 👁
 colorFrom: gray

 ---
+python_version: 3.7
 title: Photo2cartoon
 emoji: 👁
 colorFrom: gray

app.py ADDED Viewed

	@@ -0,0 +1,100 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import argparse
+import functools
+import os
+import pathlib
+import sys
+from typing import Callable
+import gradio as gr
+import huggingface_hub
+import numpy as np
+import PIL.Image
+import cv2
+from io import BytesIO
+sys.path.insert(0, 'p2c')
+from test_onnx import Photo2Cartoon
+ORIGINAL_REPO_URL = 'https://github.com/minivision-ai/photo2cartoon'
+TITLE = 'minivision-ai/photo2cartoon'
+DESCRIPTION = f"""This is a demo for {ORIGINAL_REPO_URL}.
+"""
+ARTICLE = """
+"""
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--device', type=str, default='cpu')
+    parser.add_argument('--theme', type=str)
+    parser.add_argument('--live', action='store_true')
+    parser.add_argument('--share', action='store_true')
+    parser.add_argument('--port', type=int)
+    parser.add_argument('--disable-queue',
+                        dest='enable_queue',
+                        action='store_false')
+    parser.add_argument('--allow-flagging', type=str, default='never')
+    parser.add_argument('--allow-screenshot', action='store_true')
+    return parser.parse_args()
+def run(
+    image,
+    p2c : Photo2Cartoon,
+) -> tuple[PIL.Image.Image]:
+    cartoon = p2c.inference(image.name)
+    return PIL.Image.fromarray(cartoon)
+def main():
+    gr.close_all()
+    args = parse_args()
+    p2c = Photo2Cartoon()
+    func = functools.partial(run, p2c)
+    func = functools.update_wrapper(func, run)
+    gr.Interface(
+        func,
+        [
+            gr.inputs.Image(type='file', label='Input Image'),
+        ],
+        [
+            gr.outputs.Image(
+                type='pil',
+                label='Result'),
+        ],
+        #examples=examples,
+        theme=args.theme,
+        title=TITLE,
+        description=DESCRIPTION,
+        article=ARTICLE,
+        allow_screenshot=args.allow_screenshot,
+        allow_flagging=args.allow_flagging,
+        live=args.live,
+    ).launch(
+        enable_queue=args.enable_queue,
+        server_port=args.port,
+        share=args.share,
+    )
+if __name__ == '__main__':
+    main()

p2c/cog.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+predict: "predict.py:Predictor"
+build:
+  python_version: "3.8"
+  system_packages:
+    - "libgl1-mesa-glx"
+    - "libglib2.0-0"
+  python_packages:
+    - "cmake==3.21.1"
+    - "torch==1.8.0"
+    - "torchvision==0.9.0"
+    - "numpy==1.19.2"
+    - "ipython==7.21.0"
+    - "opencv-python==4.3.0.38"
+    - "face-alignment==1.3.4"
+    - "tensorflow-gpu==2.5.0"
+  pre_install:
+    - pip install dlib

p2c/data_process.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import os
+import cv2
+import numpy as np
+from tqdm import tqdm
+import argparse
+from utils import Preprocess
+parser = argparse.ArgumentParser()
+parser.add_argument('--data_path', type=str, help='photo folder path')
+parser.add_argument('--save_path', type=str, help='save folder path')
+args = parser.parse_args()
+os.makedirs(args.save_path, exist_ok=True)
+pre = Preprocess()
+for idx, img_name in enumerate(tqdm(os.listdir(args.data_path))):
+    img = cv2.cvtColor(cv2.imread(os.path.join(args.data_path, img_name)), cv2.COLOR_BGR2RGB)
+    # face alignment and segmentation
+    face_rgba = pre.process(img)
+    if face_rgba is not None:
+        # change background to white
+        face = face_rgba[:,:,:3].copy()
+        mask = face_rgba[:,:,3].copy()[:,:,np.newaxis]/255.
+        face_white_bg = (face*mask + (1-mask)*255).astype(np.uint8)
+        cv2.imwrite(os.path.join(args.save_path, str(idx).zfill(4)+'.png'), cv2.cvtColor(face_white_bg, cv2.COLOR_RGB2BGR))

p2c/dataset.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import torch.utils.data as data
+from PIL import Image
+import os
+import os.path
+def has_file_allowed_extension(filename, extensions):
+    """Checks if a file is an allowed extension.
+    Args:
+        filename (string): path to a file
+    Returns:
+        bool: True if the filename ends with a known image extension
+    """
+    filename_lower = filename.lower()
+    return any(filename_lower.endswith(ext) for ext in extensions)
+def find_classes(dir):
+    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
+    classes.sort()
+    class_to_idx = {classes[i]: i for i in range(len(classes))}
+    return classes, class_to_idx
+def make_dataset(dir, extensions):
+    images = []
+    for root, _, fnames in sorted(os.walk(dir)):
+        for fname in sorted(fnames):
+            if has_file_allowed_extension(fname, extensions):
+                path = os.path.join(root, fname)
+                item = (path, 0)
+                images.append(item)
+    return images
+class DatasetFolder(data.Dataset):
+    def __init__(self, root, loader, extensions, transform=None, target_transform=None):
+        # classes, class_to_idx = find_classes(root)
+        samples = make_dataset(root, extensions)
+        if len(samples) == 0:
+            raise(RuntimeError("Found 0 files in subfolders of: " + root + "\n"
+                               "Supported extensions are: " + ",".join(extensions)))
+        self.root = root
+        self.loader = loader
+        self.extensions = extensions
+        self.samples = samples
+        self.transform = transform
+        self.target_transform = target_transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+        return sample, target
+    def __len__(self):
+        return len(self.samples)
+    def __repr__(self):
+        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
+        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
+        fmt_str += '    Root Location: {}\n'.format(self.root)
+        tmp = '    Transforms (if any): '
+        fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        tmp = '    Target Transforms (if any): '
+        fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
+        return fmt_str
+IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif']
+def pil_loader(path):
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+def default_loader(path):
+    return pil_loader(path)
+class ImageFolder(DatasetFolder):
+    def __init__(self, root, transform=None, target_transform=None,
+                 loader=default_loader):
+        super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS,
+                                          transform=transform,
+                                          target_transform=target_transform)
+        self.imgs = self.samples

p2c/dataset/README.md ADDED Viewed

	@@ -0,0 +1,20 @@

+```
+├── dataset
+    └── photo2cartoon
+        ├── trainA
+            ├── xxx.jpg
+            ├── yyy.png
+            └── ...
+        ├── trainB
+            ├── zzz.jpg
+            ├── www.png
+            └── ...
+        ├── testA
+            ├── aaa.jpg
+            ├── bbb.png
+            └── ...
+        └── testB
+            ├── ccc.jpg
+            ├── ddd.png
+            └── ...
+```

p2c/images/QRcode.jpg ADDED Viewed

p2c/images/data_process.jpg ADDED Viewed

p2c/images/photo_test.jpg ADDED Viewed

p2c/images/results.png ADDED Viewed

p2c/images/title.png ADDED Viewed

p2c/models/UGATIT_sadalin_hourglass.py ADDED Viewed

	@@ -0,0 +1,489 @@

+import time
+import itertools
+from dataset import ImageFolder
+from torchvision import transforms
+from torch.utils.data import DataLoader
+from .networks import *
+from utils import *
+from glob import glob
+from .face_features import FaceFeatures
+class UgatitSadalinHourglass(object):
+    def __init__(self, args):
+        self.light = args.light
+        if self.light:
+            self.model_name = 'UGATIT_light'
+        else:
+            self.model_name = 'UGATIT'
+        self.result_dir = args.result_dir
+        self.dataset = args.dataset
+        self.iteration = args.iteration
+        self.decay_flag = args.decay_flag
+        self.batch_size = args.batch_size
+        self.print_freq = args.print_freq
+        self.save_freq = args.save_freq
+        self.lr = args.lr
+        self.ch = args.ch
+        """ Weight """
+        self.adv_weight = args.adv_weight
+        self.cycle_weight = args.cycle_weight
+        self.identity_weight = args.identity_weight
+        self.cam_weight = args.cam_weight
+        self.faceid_weight = args.faceid_weight
+        """ Discriminator """
+        self.n_dis = args.n_dis
+        self.img_size = args.img_size
+        self.img_ch = args.img_ch
+        self.device = f'cuda:{args.gpu_ids[0]}'
+        self.gpu_ids = args.gpu_ids
+        self.benchmark_flag = args.benchmark_flag
+        self.resume = args.resume
+        self.rho_clipper = args.rho_clipper
+        self.w_clipper = args.w_clipper
+        self.pretrained_weights = args.pretrained_weights
+        if torch.backends.cudnn.enabled and self.benchmark_flag:
+            print('set benchmark !')
+            torch.backends.cudnn.benchmark = True
+        print("##### Information #####")
+        print("# light : ", self.light)
+        print("# dataset : ", self.dataset)
+        print("# batch_size : ", self.batch_size)
+        print("# iteration per epoch : ", self.iteration)
+        print("##### Discriminator #####")
+        print("# discriminator layer : ", self.n_dis)
+        print()
+        print("##### Weight #####")
+        print("# adv_weight : ", self.adv_weight)
+        print("# cycle_weight : ", self.cycle_weight)
+        print("# faceid_weight : ", self.faceid_weight)
+        print("# identity_weight : ", self.identity_weight)
+        print("# cam_weight : ", self.cam_weight)
+        print("# rho_clipper: ", self.rho_clipper)
+        print("# w_clipper: ", self.w_clipper)
+    ##################################################################################
+    # Model
+    ##################################################################################
+    def build_model(self):
+        """ DataLoader """
+        train_transform = transforms.Compose([
+            transforms.RandomHorizontalFlip(),
+            transforms.Resize((self.img_size + 30, self.img_size+30)),
+            transforms.RandomCrop(self.img_size),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        ])
+        test_transform = transforms.Compose([
+            transforms.Resize((self.img_size, self.img_size)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+        ])
+        self.trainA = ImageFolder(os.path.join('dataset', self.dataset, 'trainA'), train_transform)
+        self.trainB = ImageFolder(os.path.join('dataset', self.dataset, 'trainB'), train_transform)
+        self.testA = ImageFolder(os.path.join('dataset', self.dataset, 'testA'), test_transform)
+        self.testB = ImageFolder(os.path.join('dataset', self.dataset, 'testB'), test_transform)
+        self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, shuffle=True)
+        self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, shuffle=True)
+        self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False)
+        self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False)
+        """ Define Generator, Discriminator """
+        self.genA2B = ResnetGenerator(ngf=self.ch, img_size=self.img_size, light=self.light).to(self.device)
+        self.genB2A = ResnetGenerator(ngf=self.ch, img_size=self.img_size, light=self.light).to(self.device)
+        self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7).to(self.device)
+        self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7).to(self.device)
+        self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5).to(self.device)
+        self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5).to(self.device)
+        self.facenet = FaceFeatures('models/model_mobilefacenet.pth', self.device)
+        """ Define Loss """
+        self.L1_loss = nn.L1Loss().to(self.device)
+        self.MSE_loss = nn.MSELoss().to(self.device)
+        self.BCE_loss = nn.BCEWithLogitsLoss().to(self.device)
+        """ Trainer """
+        self.G_optim = torch.optim.Adam(itertools.chain(self.genA2B.parameters(), self.genB2A.parameters()), lr=self.lr, betas=(0.5, 0.999), weight_decay=0.0001)
+        self.D_optim = torch.optim.Adam(
+            itertools.chain(self.disGA.parameters(), self.disGB.parameters(), self.disLA.parameters(), self.disLB.parameters()),
+            lr=self.lr, betas=(0.5, 0.999), weight_decay=0.0001
+        )
+        """ Define Rho clipper to constraint the value of rho in AdaLIN and LIN"""
+        self.Rho_clipper = RhoClipper(0, self.rho_clipper)
+        self.W_Clipper = WClipper(0, self.w_clipper)
+    def train(self):
+        self.genA2B.train(), self.genB2A.train(), self.disGA.train(), self.disGB.train(), self.disLA.train(), self.disLB.train()
+        start_iter = 1
+        if self.resume:
+            model_list = glob(os.path.join(self.result_dir, self.dataset, 'model', '*.pt'))
+            if not len(model_list) == 0:
+                model_list.sort()
+                start_iter = int(model_list[-1].split('_')[-1].split('.')[0])
+                self.load(os.path.join(self.result_dir, self.dataset, 'model'), start_iter)
+                print(" [*] Load SUCCESS")
+                if self.decay_flag and start_iter > (self.iteration // 2):
+                    self.G_optim.param_groups[0]['lr'] -= (self.lr / (self.iteration // 2)) * (start_iter - self.iteration // 2)
+                    self.D_optim.param_groups[0]['lr'] -= (self.lr / (self.iteration // 2)) * (start_iter - self.iteration // 2)
+        if self.pretrained_weights:
+            params = torch.load(self.pretrained_weights, map_location=self.device)
+            self.genA2B.load_state_dict(params['genA2B'])
+            self.genB2A.load_state_dict(params['genB2A'])
+            self.disGA.load_state_dict(params['disGA'])
+            self.disGB.load_state_dict(params['disGB'])
+            self.disLA.load_state_dict(params['disLA'])
+            self.disLB.load_state_dict(params['disLB'])
+            print(" [*] Load {} Success".format(self.pretrained_weights))
+        if len(self.gpu_ids) > 1:
+            self.genA2B = nn.DataParallel(self.genA2B, device_ids=self.gpu_ids)
+            self.genB2A = nn.DataParallel(self.genB2A, device_ids=self.gpu_ids)
+            self.disGA = nn.DataParallel(self.disGA, device_ids=self.gpu_ids)
+            self.disGB = nn.DataParallel(self.disGB, device_ids=self.gpu_ids)
+            self.disLA = nn.DataParallel(self.disLA, device_ids=self.gpu_ids)
+            self.disLB = nn.DataParallel(self.disLB, device_ids=self.gpu_ids)
+        # training loop
+        print('training start !')
+        start_time = time.time()
+        for step in range(start_iter, self.iteration + 1):
+            if self.decay_flag and step > (self.iteration // 2):
+                self.G_optim.param_groups[0]['lr'] -= (self.lr / (self.iteration // 2))
+                self.D_optim.param_groups[0]['lr'] -= (self.lr / (self.iteration // 2))
+            try:
+                real_A, _ = trainA_iter.next()
+            except:
+                trainA_iter = iter(self.trainA_loader)
+                real_A, _ = trainA_iter.next()
+            try:
+                real_B, _ = trainB_iter.next()
+            except:
+                trainB_iter = iter(self.trainB_loader)
+                real_B, _ = trainB_iter.next()
+            real_A, real_B = real_A.to(self.device), real_B.to(self.device)
+            # Update D
+            self.D_optim.zero_grad()
+            fake_A2B, _, _ = self.genA2B(real_A)
+            fake_B2A, _, _ = self.genB2A(real_B)
+            real_GA_logit, real_GA_cam_logit, _ = self.disGA(real_A)
+            real_LA_logit, real_LA_cam_logit, _ = self.disLA(real_A)
+            real_GB_logit, real_GB_cam_logit, _ = self.disGB(real_B)
+            real_LB_logit, real_LB_cam_logit, _ = self.disLB(real_B)
+            fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A)
+            fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A)
+            fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B)
+            fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B)
+            D_ad_loss_GA = self.MSE_loss(real_GA_logit, torch.ones_like(real_GA_logit).to(self.device)) + \
+                           self.MSE_loss(fake_GA_logit, torch.zeros_like(fake_GA_logit).to(self.device))
+            D_ad_cam_loss_GA = self.MSE_loss(real_GA_cam_logit, torch.ones_like(real_GA_cam_logit).to(self.device)) + \
+                               self.MSE_loss(fake_GA_cam_logit, torch.zeros_like(fake_GA_cam_logit).to(self.device))
+            D_ad_loss_LA = self.MSE_loss(real_LA_logit, torch.ones_like(real_LA_logit).to(self.device)) + \
+                           self.MSE_loss(fake_LA_logit, torch.zeros_like(fake_LA_logit).to(self.device))
+            D_ad_cam_loss_LA = self.MSE_loss(real_LA_cam_logit, torch.ones_like(real_LA_cam_logit).to(self.device)) +\
+                               self.MSE_loss(fake_LA_cam_logit, torch.zeros_like(fake_LA_cam_logit).to(self.device))
+            D_ad_loss_GB = self.MSE_loss(real_GB_logit, torch.ones_like(real_GB_logit).to(self.device)) + \
+                           self.MSE_loss(fake_GB_logit, torch.zeros_like(fake_GB_logit).to(self.device))
+            D_ad_cam_loss_GB = self.MSE_loss(real_GB_cam_logit, torch.ones_like(real_GB_cam_logit).to(self.device)) + \
+                               self.MSE_loss(fake_GB_cam_logit, torch.zeros_like(fake_GB_cam_logit).to(self.device))
+            D_ad_loss_LB = self.MSE_loss(real_LB_logit, torch.ones_like(real_LB_logit).to(self.device)) + \
+                           self.MSE_loss(fake_LB_logit, torch.zeros_like(fake_LB_logit).to(self.device))
+            D_ad_cam_loss_LB = self.MSE_loss(real_LB_cam_logit, torch.ones_like(real_LB_cam_logit).to(self.device)) +\
+                               self.MSE_loss(fake_LB_cam_logit, torch.zeros_like(fake_LB_cam_logit).to(self.device))
+            D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA + D_ad_loss_LA + D_ad_cam_loss_LA)
+            D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB + D_ad_loss_LB + D_ad_cam_loss_LB)
+            Discriminator_loss = D_loss_A + D_loss_B
+            Discriminator_loss.backward()
+            self.D_optim.step()
+            # Update G
+            self.G_optim.zero_grad()
+            fake_A2B, fake_A2B_cam_logit, _ = self.genA2B(real_A)
+            fake_B2A, fake_B2A_cam_logit, _ = self.genB2A(real_B)
+            fake_A2B2A, _, _ = self.genB2A(fake_A2B)
+            fake_B2A2B, _, _ = self.genA2B(fake_B2A)
+            fake_A2A, fake_A2A_cam_logit, _ = self.genB2A(real_A)
+            fake_B2B, fake_B2B_cam_logit, _ = self.genA2B(real_B)
+            fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A)
+            fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A)
+            fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B)
+            fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B)
+            G_ad_loss_GA = self.MSE_loss(fake_GA_logit, torch.ones_like(fake_GA_logit).to(self.device))
+            G_ad_cam_loss_GA = self.MSE_loss(fake_GA_cam_logit, torch.ones_like(fake_GA_cam_logit).to(self.device))
+            G_ad_loss_LA = self.MSE_loss(fake_LA_logit, torch.ones_like(fake_LA_logit).to(self.device))
+            G_ad_cam_loss_LA = self.MSE_loss(fake_LA_cam_logit, torch.ones_like(fake_LA_cam_logit).to(self.device))
+            G_ad_loss_GB = self.MSE_loss(fake_GB_logit, torch.ones_like(fake_GB_logit).to(self.device))
+            G_ad_cam_loss_GB = self.MSE_loss(fake_GB_cam_logit, torch.ones_like(fake_GB_cam_logit).to(self.device))
+            G_ad_loss_LB = self.MSE_loss(fake_LB_logit, torch.ones_like(fake_LB_logit).to(self.device))
+            G_ad_cam_loss_LB = self.MSE_loss(fake_LB_cam_logit, torch.ones_like(fake_LB_cam_logit).to(self.device))
+            G_recon_loss_A = self.L1_loss(fake_A2B2A, real_A)
+            G_recon_loss_B = self.L1_loss(fake_B2A2B, real_B)
+            G_identity_loss_A = self.L1_loss(fake_A2A, real_A)
+            G_identity_loss_B = self.L1_loss(fake_B2B, real_B)
+            G_id_loss_A = self.facenet.cosine_distance(real_A, fake_A2B)
+            G_id_loss_B = self.facenet.cosine_distance(real_B, fake_B2A)
+            if len(self.gpu_ids) > 1:
+                G_id_loss_A = torch.mean(G_id_loss_A)
+                G_id_loss_B = torch.mean(G_id_loss_B)
+            G_cam_loss_A = self.BCE_loss(fake_B2A_cam_logit, torch.ones_like(fake_B2A_cam_logit).to(self.device)) + \
+                           self.BCE_loss(fake_A2A_cam_logit, torch.zeros_like(fake_A2A_cam_logit).to(self.device))
+            G_cam_loss_B = self.BCE_loss(fake_A2B_cam_logit, torch.ones_like(fake_A2B_cam_logit).to(self.device)) + \
+                           self.BCE_loss(fake_B2B_cam_logit, torch.zeros_like(fake_B2B_cam_logit).to(self.device))
+            G_loss_A = self.adv_weight * (G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA) + \
+                       self.cycle_weight * G_recon_loss_A + self.identity_weight * G_identity_loss_A + \
+                       self.cam_weight * G_cam_loss_A + self.faceid_weight * G_id_loss_A
+            G_loss_B = self.adv_weight * (G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB + G_ad_cam_loss_LB) + \
+                       self.cycle_weight * G_recon_loss_B + self.identity_weight * G_identity_loss_B + \
+                       self.cam_weight * G_cam_loss_B + self.faceid_weight * G_id_loss_B
+            Generator_loss = G_loss_A + G_loss_B
+            Generator_loss.backward()
+            self.G_optim.step()
+            # clip parameter of Soft-AdaLIN and LIN, applied after optimizer step
+            self.genA2B.apply(self.Rho_clipper)
+            self.genB2A.apply(self.Rho_clipper)
+            self.genA2B.apply(self.W_Clipper)
+            self.genB2A.apply(self.W_Clipper)
+            if step % 10 == 0:
+                print("[%5d/%5d] time: %4.4f d_loss: %.8f, g_loss: %.8f" % (step, self.iteration, time.time() - start_time, Discriminator_loss, Generator_loss))
+            if step % self.print_freq == 0:
+                train_sample_num = 5
+                test_sample_num = 5
+                A2B = np.zeros((self.img_size * 7, 0, 3))
+                B2A = np.zeros((self.img_size * 7, 0, 3))
+                self.genA2B.eval(), self.genB2A.eval(), self.disGA.eval(), self.disGB.eval(), self.disLA.eval(), self.disLB.eval()
+                with torch.no_grad():
+                    for _ in range(train_sample_num):
+                        try:
+                            real_A, _ = trainA_iter.next()
+                        except:
+                            trainA_iter = iter(self.trainA_loader)
+                            real_A, _ = trainA_iter.next()
+                        try:
+                            real_B, _ = trainB_iter.next()
+                        except:
+                            trainB_iter = iter(self.trainB_loader)
+                            real_B, _ = trainB_iter.next()
+                        real_A, real_B = real_A.to(self.device), real_B.to(self.device)
+                        fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A)
+                        fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B)
+                        fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B)
+                        fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A)
+                        fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A)
+                        fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B)
+                        A2B = np.concatenate((A2B, np.concatenate((RGB2BGR(tensor2numpy(denorm(real_A[0]))),
+                                                                   cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))),
+                                                                   cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))),
+                                                                   cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)), 1)
+                        B2A = np.concatenate((B2A, np.concatenate((RGB2BGR(tensor2numpy(denorm(real_B[0]))),
+                                                                   cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))),
+                                                                   cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))),
+                                                                   cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)), 1)
+                    for _ in range(test_sample_num):
+                        try:
+                            real_A, _ = testA_iter.next()
+                        except:
+                            testA_iter = iter(self.testA_loader)
+                            real_A, _ = testA_iter.next()
+                        try:
+                            real_B, _ = testB_iter.next()
+                        except:
+                            testB_iter = iter(self.testB_loader)
+                            real_B, _ = testB_iter.next()
+                        real_A, real_B = real_A.to(self.device), real_B.to(self.device)
+                        fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A)
+                        fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B)
+                        fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B)
+                        fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A)
+                        fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A)
+                        fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B)
+                        A2B = np.concatenate((A2B, np.concatenate((RGB2BGR(tensor2numpy(denorm(real_A[0]))),
+                                                                   cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))),
+                                                                   cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))),
+                                                                   cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)), 1)
+                        B2A = np.concatenate((B2A, np.concatenate((RGB2BGR(tensor2numpy(denorm(real_B[0]))),
+                                                                   cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))),
+                                                                   cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))),
+                                                                   cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size),
+                                                                   RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)), 1)
+                cv2.imwrite(os.path.join(self.result_dir, self.dataset, 'img', 'A2B_%07d.png' % step), A2B * 255.0)
+                cv2.imwrite(os.path.join(self.result_dir, self.dataset, 'img', 'B2A_%07d.png' % step), B2A * 255.0)
+                self.genA2B.train(), self.genB2A.train(), self.disGA.train(), self.disGB.train(), self.disLA.train(), self.disLB.train()
+            if step % self.save_freq == 0:
+                self.save(os.path.join(self.result_dir, self.dataset, 'model'), step)
+            if step % 1000 == 0:
+                params = {}
+                if len(self.gpu_ids) > 1:
+                    params['genA2B'] = self.genA2B.module.state_dict()
+                    params['genB2A'] = self.genB2A.module.state_dict()
+                    params['disGA'] = self.disGA.module.state_dict()
+                    params['disGB'] = self.disGB.module.state_dict()
+                    params['disLA'] = self.disLA.module.state_dict()
+                    params['disLB'] = self.disLB.module.state_dict()
+                else:
+                    params['genA2B'] = self.genA2B.state_dict()
+                    params['genB2A'] = self.genB2A.state_dict()
+                    params['disGA'] = self.disGA.state_dict()
+                    params['disGB'] = self.disGB.state_dict()
+                    params['disLA'] = self.disLA.state_dict()
+                    params['disLB'] = self.disLB.state_dict()
+                torch.save(params, os.path.join(self.result_dir, self.dataset + '_params_latest.pt'))
+    def save(self, dir, step):
+        params = {}
+        if len(self.gpu_ids) > 1:
+            params['genA2B'] = self.genA2B.module.state_dict()
+            params['genB2A'] = self.genB2A.module.state_dict()
+            params['disGA'] = self.disGA.module.state_dict()
+            params['disGB'] = self.disGB.module.state_dict()
+            params['disLA'] = self.disLA.module.state_dict()
+            params['disLB'] = self.disLB.module.state_dict()
+        else:
+            params['genA2B'] = self.genA2B.state_dict()
+            params['genB2A'] = self.genB2A.state_dict()
+            params['disGA'] = self.disGA.state_dict()
+            params['disGB'] = self.disGB.state_dict()
+            params['disLA'] = self.disLA.state_dict()
+            params['disLB'] = self.disLB.state_dict()
+        torch.save(params, os.path.join(dir, self.dataset + '_params_%07d.pt' % step))
+    def load(self, dir, step):
+        params = torch.load(os.path.join(dir, self.dataset + '_params_%07d.pt' % step))
+        self.genA2B.load_state_dict(params['genA2B'])
+        self.genB2A.load_state_dict(params['genB2A'])
+        self.disGA.load_state_dict(params['disGA'])
+        self.disGB.load_state_dict(params['disGB'])
+        self.disLA.load_state_dict(params['disLA'])
+        self.disLB.load_state_dict(params['disLB'])
+    def test(self):
+        model_list = glob(os.path.join(self.result_dir, self.dataset, 'model', '*.pt'))
+        if not len(model_list) == 0:
+            model_list.sort()
+            iter = int(model_list[-1].split('_')[-1].split('.')[0])
+            self.load(os.path.join(self.result_dir, self.dataset, 'model'), iter)
+            print(" [*] Load SUCCESS")
+        else:
+            print(" [*] Load FAILURE")
+            return
+        self.genA2B.eval(), self.genB2A.eval()
+        with torch.no_grad():
+            for n, (real_A, _) in enumerate(self.testA_loader):
+                real_A = real_A.to(self.device)
+                fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A)
+                fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(fake_A2B)
+                fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A)
+                A2B = np.concatenate((RGB2BGR(tensor2numpy(denorm(real_A[0]))),
+                                      cam(tensor2numpy(fake_A2A_heatmap[0]), self.img_size),
+                                      RGB2BGR(tensor2numpy(denorm(fake_A2A[0]))),
+                                      cam(tensor2numpy(fake_A2B_heatmap[0]), self.img_size),
+                                      RGB2BGR(tensor2numpy(denorm(fake_A2B[0]))),
+                                      cam(tensor2numpy(fake_A2B2A_heatmap[0]), self.img_size),
+                                      RGB2BGR(tensor2numpy(denorm(fake_A2B2A[0])))), 0)
+                cv2.imwrite(os.path.join(self.result_dir, self.dataset, 'test', 'A2B_%d.png' % (n + 1)), A2B * 255.0)
+            for n, (real_B, _) in enumerate(self.testB_loader):
+                real_B = real_B.to(self.device)
+                fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B)
+                fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(fake_B2A)
+                fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B)
+                B2A = np.concatenate((RGB2BGR(tensor2numpy(denorm(real_B[0]))),
+                                      cam(tensor2numpy(fake_B2B_heatmap[0]), self.img_size),
+                                      RGB2BGR(tensor2numpy(denorm(fake_B2B[0]))),
+                                      cam(tensor2numpy(fake_B2A_heatmap[0]), self.img_size),
+                                      RGB2BGR(tensor2numpy(denorm(fake_B2A[0]))),
+                                      cam(tensor2numpy(fake_B2A2B_heatmap[0]), self.img_size),
+                                      RGB2BGR(tensor2numpy(denorm(fake_B2A2B[0])))), 0)
+                cv2.imwrite(os.path.join(self.result_dir, self.dataset, 'test', 'B2A_%d.png' % (n + 1)), B2A * 255.0)

p2c/models/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .networks import ResnetGenerator
2	+ from .UGATIT_sadalin_hourglass import UgatitSadalinHourglass
3	+

p2c/models/face_features.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import torch
+import torch.nn.functional as F
+from .mobilefacenet import MobileFaceNet
+class FaceFeatures(object):
+    def __init__(self, weights_path, device):
+        self.device = device
+        self.model = MobileFaceNet(512).to(device)
+        self.model.load_state_dict(torch.load(weights_path))
+        self.model.eval()
+    def infer(self, batch_tensor):
+        # crop face
+        h, w = batch_tensor.shape[2:]
+        top = int(h / 2.1 * (0.8 - 0.33))
+        bottom = int(h - (h / 2.1 * 0.3))
+        size = bottom - top
+        left = int(w / 2 - size / 2)
+        right = left + size
+        batch_tensor = batch_tensor[:, :, top: bottom, left: right]
+        batch_tensor = F.interpolate(batch_tensor, size=[112, 112], mode='bilinear', align_corners=True)
+        features = self.model(batch_tensor)
+        return features
+    def cosine_distance(self, batch_tensor1, batch_tensor2):
+        feature1 = self.infer(batch_tensor1)
+        feature2 = self.infer(batch_tensor2)
+        return 1 - torch.cosine_similarity(feature1, feature2)

p2c/models/mobilefacenet.py ADDED Viewed

	@@ -0,0 +1,258 @@

+from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, ReLU, Sigmoid, Dropout, \
+    MaxPool2d, AdaptiveAvgPool2d, Sequential, Module
+import torch
+from collections import namedtuple
+##################################  Original Arcface Model #############################################################
+class Flatten(Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+def l2_norm(input, axis=1):
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(
+            channels, channels // reduction, kernel_size=1, padding=0, bias=False)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(
+            channels // reduction, channels, kernel_size=1, padding=0, bias=False)
+        self.sigmoid = Sigmoid()
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+class bottleneck_IR(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False), BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth))
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class bottleneck_IR_SE(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth))
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth),
+            SEModule(depth, 16)
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    '''A named tuple describing a ResNet block.'''
+def get_block(in_channel, depth, num_units, stride=2):
+    return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    return blocks
+class Backbone(Module):
+    def __init__(self, num_layers, drop_ratio, mode='ir'):
+        super(Backbone, self).__init__()
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        self.output_layer = Sequential(BatchNorm2d(512),
+                                       Dropout(drop_ratio),
+                                       Flatten(),
+                                       Linear(512 * 7 * 7, 512),
+                                       BatchNorm1d(512))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(
+                    unit_module(bottleneck.in_channel,
+                                bottleneck.depth,
+                                bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return l2_norm(x)
+##################################  MobileFaceNet #############################################################
+class Conv_block(Module):
+    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
+        super(Conv_block, self).__init__()
+        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding,
+                           bias=False)
+        self.bn = BatchNorm2d(out_c)
+        self.prelu = PReLU(out_c)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.prelu(x)
+        return x
+class Linear_block(Module):
+    def __init__(self, in_c, out_c, kernel=(1, 1), stride=(1, 1), padding=(0, 0), groups=1):
+        super(Linear_block, self).__init__()
+        self.conv = Conv2d(in_c, out_channels=out_c, kernel_size=kernel, groups=groups, stride=stride, padding=padding,
+                           bias=False)
+        self.bn = BatchNorm2d(out_c)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class Depth_Wise(Module):
+    def __init__(self, in_c, out_c, residual=False, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=1):
+        super(Depth_Wise, self).__init__()
+        self.conv = Conv_block(in_c, out_c=groups, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
+        self.conv_dw = Conv_block(groups, groups, groups=groups, kernel=kernel, padding=padding, stride=stride)
+        self.project = Linear_block(groups, out_c, kernel=(1, 1), padding=(0, 0), stride=(1, 1))
+        self.residual = residual
+    def forward(self, x):
+        if self.residual:
+            short_cut = x
+        x = self.conv(x)
+        x = self.conv_dw(x)
+        x = self.project(x)
+        if self.residual:
+            output = short_cut + x
+        else:
+            output = x
+        return output
+class Residual(Module):
+    def __init__(self, c, num_block, groups, kernel=(3, 3), stride=(1, 1), padding=(1, 1)):
+        super(Residual, self).__init__()
+        modules = []
+        for _ in range(num_block):
+            modules.append(
+                Depth_Wise(c, c, residual=True, kernel=kernel, padding=padding, stride=stride, groups=groups))
+        self.model = Sequential(*modules)
+    def forward(self, x):
+        return self.model(x)
+class MobileFaceNet(Module):
+    def __init__(self, embedding_size):
+        super(MobileFaceNet, self).__init__()
+        self.conv1 = Conv_block(3, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1))
+        self.conv2_dw = Conv_block(64, 64, kernel=(3, 3), stride=(1, 1), padding=(1, 1), groups=64)
+        self.conv_23 = Depth_Wise(64, 64, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=128)
+        self.conv_3 = Residual(64, num_block=4, groups=128, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv_34 = Depth_Wise(64, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=256)
+        self.conv_4 = Residual(128, num_block=6, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv_45 = Depth_Wise(128, 128, kernel=(3, 3), stride=(2, 2), padding=(1, 1), groups=512)
+        self.conv_5 = Residual(128, num_block=2, groups=256, kernel=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv_6_sep = Conv_block(128, 512, kernel=(1, 1), stride=(1, 1), padding=(0, 0))
+        self.conv_6_dw = Linear_block(512, 512, groups=512, kernel=(7, 7), stride=(1, 1), padding=(0, 0))
+        self.conv_6_flatten = Flatten()
+        self.linear = Linear(512, embedding_size, bias=False)
+        self.bn = BatchNorm1d(embedding_size)
+    def forward(self, x):
+        out = self.conv1(x)
+        out = self.conv2_dw(out)
+        out = self.conv_23(out)
+        out = self.conv_3(out)
+        out = self.conv_34(out)
+        out = self.conv_4(out)
+        out = self.conv_45(out)
+        out = self.conv_5(out)
+        out = self.conv_6_sep(out)
+        out = self.conv_6_dw(out)
+        out = self.conv_6_flatten(out)
+        out = self.linear(out)
+        out = self.bn(out)
+        return l2_norm(out)

p2c/models/model_mobilefacenet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4f3bbd745247b32641724bf6d7964df7fd94ea5a098fe16d692b412fe44cd59b
+size 4938364

p2c/models/networks.py ADDED Viewed

	@@ -0,0 +1,485 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn.parameter import Parameter
+class ResnetGenerator(nn.Module):
+    def __init__(self, ngf=64, img_size=256, light=False):
+        super(ResnetGenerator, self).__init__()
+        self.light = light
+        self.ConvBlock1 = nn.Sequential(nn.ReflectionPad2d(3),
+                                       nn.Conv2d(3, ngf, kernel_size=7, stride=1, padding=0, bias=False),
+                                       nn.InstanceNorm2d(ngf),
+                                       nn.ReLU(True))
+        self.HourGlass1 = HourGlass(ngf, ngf)
+        self.HourGlass2 = HourGlass(ngf, ngf)
+        # Down-Sampling
+        self.DownBlock1 = nn.Sequential(nn.ReflectionPad2d(1),
+                                        nn.Conv2d(ngf, ngf*2, kernel_size=3, stride=2, padding=0, bias=False),
+                                        nn.InstanceNorm2d(ngf * 2),
+                                        nn.ReLU(True))
+        self.DownBlock2 = nn.Sequential(nn.ReflectionPad2d(1),
+                                        nn.Conv2d(ngf*2, ngf*4, kernel_size=3, stride=2, padding=0, bias=False),
+                                        nn.InstanceNorm2d(ngf*4),
+                                        nn.ReLU(True))
+        # Encoder Bottleneck
+        self.EncodeBlock1 = ResnetBlock(ngf*4)
+        self.EncodeBlock2 = ResnetBlock(ngf*4)
+        self.EncodeBlock3 = ResnetBlock(ngf*4)
+        self.EncodeBlock4 = ResnetBlock(ngf*4)
+        # Class Activation Map
+        self.gap_fc = nn.Linear(ngf*4, 1)
+        self.gmp_fc = nn.Linear(ngf*4, 1)
+        self.conv1x1 = nn.Conv2d(ngf*8, ngf*4, kernel_size=1, stride=1)
+        self.relu = nn.ReLU(True)
+        # Gamma, Beta block
+        if self.light:
+            self.FC = nn.Sequential(nn.Linear(ngf*4, ngf*4),
+                                    nn.ReLU(True),
+                                    nn.Linear(ngf*4, ngf*4),
+                                    nn.ReLU(True))
+        else:
+            self.FC = nn.Sequential(nn.Linear(img_size//4*img_size//4*ngf*4, ngf*4),
+                                    nn.ReLU(True),
+                                    nn.Linear(ngf*4, ngf*4),
+                                    nn.ReLU(True))
+        # Decoder Bottleneck
+        self.DecodeBlock1 = ResnetSoftAdaLINBlock(ngf*4)
+        self.DecodeBlock2 = ResnetSoftAdaLINBlock(ngf*4)
+        self.DecodeBlock3 = ResnetSoftAdaLINBlock(ngf*4)
+        self.DecodeBlock4 = ResnetSoftAdaLINBlock(ngf*4)
+        # Up-Sampling
+        self.UpBlock1 = nn.Sequential(nn.Upsample(scale_factor=2),
+                                      nn.ReflectionPad2d(1),
+                                      nn.Conv2d(ngf*4, ngf*2, kernel_size=3, stride=1, padding=0, bias=False),
+                                      LIN(ngf*2),
+                                      nn.ReLU(True))
+        self.UpBlock2 = nn.Sequential(nn.Upsample(scale_factor=2),
+                                      nn.ReflectionPad2d(1),
+                                      nn.Conv2d(ngf*2, ngf, kernel_size=3, stride=1, padding=0, bias=False),
+                                      LIN(ngf),
+                                      nn.ReLU(True))
+        self.HourGlass3 = HourGlass(ngf, ngf)
+        self.HourGlass4 = HourGlass(ngf, ngf, False)
+        self.ConvBlock2 = nn.Sequential(nn.ReflectionPad2d(3),
+                                        nn.Conv2d(3, 3, kernel_size=7, stride=1, padding=0, bias=False),
+                                        nn.Tanh())
+    def forward(self, x):
+        x = self.ConvBlock1(x)
+        x = self.HourGlass1(x)
+        x = self.HourGlass2(x)
+        x = self.DownBlock1(x)
+        x = self.DownBlock2(x)
+        x = self.EncodeBlock1(x)
+        content_features1 = F.adaptive_avg_pool2d(x, 1).view(x.shape[0], -1)
+        x = self.EncodeBlock2(x)
+        content_features2 = F.adaptive_avg_pool2d(x, 1).view(x.shape[0], -1)
+        x = self.EncodeBlock3(x)
+        content_features3 = F.adaptive_avg_pool2d(x, 1).view(x.shape[0], -1)
+        x = self.EncodeBlock4(x)
+        content_features4 = F.adaptive_avg_pool2d(x, 1).view(x.shape[0], -1)
+        gap = F.adaptive_avg_pool2d(x, 1)
+        gap_logit = self.gap_fc(gap.view(x.shape[0], -1))
+        gap_weight = list(self.gap_fc.parameters())[0]
+        gap = x * gap_weight.unsqueeze(2).unsqueeze(3)
+        gmp = F.adaptive_max_pool2d(x, 1)
+        gmp_logit = self.gmp_fc(gmp.view(x.shape[0], -1))
+        gmp_weight = list(self.gmp_fc.parameters())[0]
+        gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3)
+        cam_logit = torch.cat([gap_logit, gmp_logit], 1)
+        x = torch.cat([gap, gmp], 1)
+        x = self.relu(self.conv1x1(x))
+        heatmap = torch.sum(x, dim=1, keepdim=True)
+        if self.light:
+            x_ = F.adaptive_avg_pool2d(x, 1)
+            style_features = self.FC(x_.view(x_.shape[0], -1))
+        else:
+            style_features = self.FC(x.view(x.shape[0], -1))
+        x = self.DecodeBlock1(x, content_features4, style_features)
+        x = self.DecodeBlock2(x, content_features3, style_features)
+        x = self.DecodeBlock3(x, content_features2, style_features)
+        x = self.DecodeBlock4(x, content_features1, style_features)
+        x = self.UpBlock1(x)
+        x = self.UpBlock2(x)
+        x = self.HourGlass3(x)
+        x = self.HourGlass4(x)
+        out = self.ConvBlock2(x)
+        return out, cam_logit, heatmap
+class ConvBlock(nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super(ConvBlock, self).__init__()
+        self.dim_out = dim_out
+        self.ConvBlock1 = nn.Sequential(nn.InstanceNorm2d(dim_in),
+                                        nn.ReLU(True),
+                                        nn.ReflectionPad2d(1),
+                                        nn.Conv2d(dim_in, dim_out//2, kernel_size=3, stride=1, bias=False))
+        self.ConvBlock2 = nn.Sequential(nn.InstanceNorm2d(dim_out//2),
+                                        nn.ReLU(True),
+                                        nn.ReflectionPad2d(1),
+                                        nn.Conv2d(dim_out//2, dim_out//4, kernel_size=3, stride=1, bias=False))
+        self.ConvBlock3 = nn.Sequential(nn.InstanceNorm2d(dim_out//4),
+                                        nn.ReLU(True),
+                                        nn.ReflectionPad2d(1),
+                                        nn.Conv2d(dim_out//4, dim_out//4, kernel_size=3, stride=1, bias=False))
+        self.ConvBlock4 = nn.Sequential(nn.InstanceNorm2d(dim_in),
+                                        nn.ReLU(True),
+                                        nn.Conv2d(dim_in, dim_out, kernel_size=1, stride=1, bias=False))
+    def forward(self, x):
+        residual = x
+        x1 = self.ConvBlock1(x)
+        x2 = self.ConvBlock2(x1)
+        x3 = self.ConvBlock3(x2)
+        out = torch.cat((x1, x2, x3), 1)
+        if residual.size(1) != self.dim_out:
+            residual = self.ConvBlock4(residual)
+        return residual + out
+class HourGlass(nn.Module):
+    def __init__(self, dim_in, dim_out, use_res=True):
+        super(HourGlass, self).__init__()
+        self.use_res = use_res
+        self.HG = nn.Sequential(HourGlassBlock(dim_in, dim_out),
+                                ConvBlock(dim_out, dim_out),
+                                nn.Conv2d(dim_out, dim_out, kernel_size=1, stride=1, bias=False),
+                                nn.InstanceNorm2d(dim_out),
+                                nn.ReLU(True))
+        self.Conv1 = nn.Conv2d(dim_out, 3, kernel_size=1, stride=1)
+        if self.use_res:
+            self.Conv2 = nn.Conv2d(dim_out, dim_out, kernel_size=1, stride=1)
+            self.Conv3 = nn.Conv2d(3, dim_out, kernel_size=1, stride=1)
+    def forward(self, x):
+        ll = self.HG(x)
+        tmp_out = self.Conv1(ll)
+        if self.use_res:
+            ll = self.Conv2(ll)
+            tmp_out_ = self.Conv3(tmp_out)
+            return x + ll + tmp_out_
+        else:
+            return tmp_out
+class HourGlassBlock(nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super(HourGlassBlock, self).__init__()
+        self.ConvBlock1_1 = ConvBlock(dim_in, dim_out)
+        self.ConvBlock1_2 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock2_1 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock2_2 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock3_1 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock3_2 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock4_1 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock4_2 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock5 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock6 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock7 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock8 = ConvBlock(dim_out, dim_out)
+        self.ConvBlock9 = ConvBlock(dim_out, dim_out)
+    def forward(self, x):
+        skip1 = self.ConvBlock1_1(x)
+        down1 = F.avg_pool2d(x, 2)
+        down1 = self.ConvBlock1_2(down1)
+        skip2 = self.ConvBlock2_1(down1)
+        down2 = F.avg_pool2d(down1, 2)
+        down2 = self.ConvBlock2_2(down2)
+        skip3 = self.ConvBlock3_1(down2)
+        down3 = F.avg_pool2d(down2, 2)
+        down3 = self.ConvBlock3_2(down3)
+        skip4 = self.ConvBlock4_1(down3)
+        down4 = F.avg_pool2d(down3, 2)
+        down4 = self.ConvBlock4_2(down4)
+        center = self.ConvBlock5(down4)
+        up4 = self.ConvBlock6(center)
+        up4 = F.upsample(up4, scale_factor=2)
+        up4 = skip4 + up4
+        up3 = self.ConvBlock7(up4)
+        up3 = F.upsample(up3, scale_factor=2)
+        up3 = skip3 + up3
+        up2 = self.ConvBlock8(up3)
+        up2 = F.upsample(up2, scale_factor=2)
+        up2 = skip2 + up2
+        up1 = self.ConvBlock9(up2)
+        up1 = F.upsample(up1, scale_factor=2)
+        up1 = skip1 + up1
+        return up1
+class ResnetBlock(nn.Module):
+    def __init__(self, dim, use_bias=False):
+        super(ResnetBlock, self).__init__()
+        conv_block = []
+        conv_block += [nn.ReflectionPad2d(1),
+                       nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias),
+                       nn.InstanceNorm2d(dim),
+                       nn.ReLU(True)]
+        conv_block += [nn.ReflectionPad2d(1),
+                       nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias),
+                       nn.InstanceNorm2d(dim)]
+        self.conv_block = nn.Sequential(*conv_block)
+    def forward(self, x):
+        out = x + self.conv_block(x)
+        return out
+class ResnetSoftAdaLINBlock(nn.Module):
+    def __init__(self, dim, use_bias=False):
+        super(ResnetSoftAdaLINBlock, self).__init__()
+        self.pad1 = nn.ReflectionPad2d(1)
+        self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias)
+        self.norm1 = SoftAdaLIN(dim)
+        self.relu1 = nn.ReLU(True)
+        self.pad2 = nn.ReflectionPad2d(1)
+        self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias)
+        self.norm2 = SoftAdaLIN(dim)
+    def forward(self, x, content_features, style_features):
+        out = self.pad1(x)
+        out = self.conv1(out)
+        out = self.norm1(out, content_features, style_features)
+        out = self.relu1(out)
+        out = self.pad2(out)
+        out = self.conv2(out)
+        out = self.norm2(out, content_features, style_features)
+        return out + x
+class ResnetAdaLINBlock(nn.Module):
+    def __init__(self, dim, use_bias=False):
+        super(ResnetAdaLINBlock, self).__init__()
+        self.pad1 = nn.ReflectionPad2d(1)
+        self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias)
+        self.norm1 = adaLIN(dim)
+        self.relu1 = nn.ReLU(True)
+        self.pad2 = nn.ReflectionPad2d(1)
+        self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias)
+        self.norm2 = adaLIN(dim)
+    def forward(self, x, gamma, beta):
+        out = self.pad1(x)
+        out = self.conv1(out)
+        out = self.norm1(out, gamma, beta)
+        out = self.relu1(out)
+        out = self.pad2(out)
+        out = self.conv2(out)
+        out = self.norm2(out, gamma, beta)
+        return out + x
+class SoftAdaLIN(nn.Module):
+    def __init__(self, num_features, eps=1e-5):
+        super(SoftAdaLIN, self).__init__()
+        self.norm = adaLIN(num_features, eps)
+        self.w_gamma = Parameter(torch.zeros(1, num_features))
+        self.w_beta = Parameter(torch.zeros(1, num_features))
+        self.c_gamma = nn.Sequential(nn.Linear(num_features, num_features),
+                                     nn.ReLU(True),
+                                     nn.Linear(num_features, num_features))
+        self.c_beta = nn.Sequential(nn.Linear(num_features, num_features),
+                                    nn.ReLU(True),
+                                    nn.Linear(num_features, num_features))
+        self.s_gamma = nn.Linear(num_features, num_features)
+        self.s_beta = nn.Linear(num_features, num_features)
+    def forward(self, x, content_features, style_features):
+        content_gamma, content_beta = self.c_gamma(content_features), self.c_beta(content_features)
+        style_gamma, style_beta = self.s_gamma(style_features), self.s_beta(style_features)
+        w_gamma, w_beta = self.w_gamma.expand(x.shape[0], -1), self.w_beta.expand(x.shape[0], -1)
+        soft_gamma = (1. - w_gamma) * style_gamma + w_gamma * content_gamma
+        soft_beta = (1. - w_beta) * style_beta + w_beta * content_beta
+        out = self.norm(x, soft_gamma, soft_beta)
+        return out
+class adaLIN(nn.Module):
+    def __init__(self, num_features, eps=1e-5):
+        super(adaLIN, self).__init__()
+        self.eps = eps
+        self.rho = Parameter(torch.Tensor(1, num_features, 1, 1))
+        self.rho.data.fill_(0.9)
+    def forward(self, input, gamma, beta):
+        in_mean, in_var = torch.mean(input, dim=[2, 3], keepdim=True), torch.var(input, dim=[2, 3], keepdim=True)
+        out_in = (input - in_mean) / torch.sqrt(in_var + self.eps)
+        ln_mean, ln_var = torch.mean(input, dim=[1, 2, 3], keepdim=True), torch.var(input, dim=[1, 2, 3], keepdim=True)
+        out_ln = (input - ln_mean) / torch.sqrt(ln_var + self.eps)
+        out = self.rho.expand(input.shape[0], -1, -1, -1) * out_in + (1-self.rho.expand(input.shape[0], -1, -1, -1)) * out_ln
+        out = out * gamma.unsqueeze(2).unsqueeze(3) + beta.unsqueeze(2).unsqueeze(3)
+        return out
+class LIN(nn.Module):
+    def __init__(self, num_features, eps=1e-5):
+        super(LIN, self).__init__()
+        self.eps = eps
+        self.rho = Parameter(torch.Tensor(1, num_features, 1, 1))
+        self.gamma = Parameter(torch.Tensor(1, num_features, 1, 1))
+        self.beta = Parameter(torch.Tensor(1, num_features, 1, 1))
+        self.rho.data.fill_(0.0)
+        self.gamma.data.fill_(1.0)
+        self.beta.data.fill_(0.0)
+    def forward(self, input):
+        in_mean, in_var = torch.mean(input, dim=[2, 3], keepdim=True), torch.var(input, dim=[2, 3], keepdim=True)
+        out_in = (input - in_mean) / torch.sqrt(in_var + self.eps)
+        ln_mean, ln_var = torch.mean(input, dim=[1, 2, 3], keepdim=True), torch.var(input, dim=[1, 2, 3], keepdim=True)
+        out_ln = (input - ln_mean) / torch.sqrt(ln_var + self.eps)
+        out = self.rho.expand(input.shape[0], -1, -1, -1) * out_in + (1-self.rho.expand(input.shape[0], -1, -1, -1)) * out_ln
+        out = out * self.gamma.expand(input.shape[0], -1, -1, -1) + self.beta.expand(input.shape[0], -1, -1, -1)
+        return out
+class Discriminator(nn.Module):
+    def __init__(self, input_nc, ndf=64, n_layers=5):
+        super(Discriminator, self).__init__()
+        model = [nn.ReflectionPad2d(1),
+                 nn.utils.spectral_norm(
+                 nn.Conv2d(input_nc, ndf, kernel_size=4, stride=2, padding=0, bias=True)),
+                 nn.LeakyReLU(0.2, True)]
+        for i in range(1, n_layers - 2):
+            mult = 2 ** (i - 1)
+            model += [nn.ReflectionPad2d(1),
+                      nn.utils.spectral_norm(
+                      nn.Conv2d(ndf * mult, ndf * mult * 2, kernel_size=4, stride=2, padding=0, bias=True)),
+                      nn.LeakyReLU(0.2, True)]
+        mult = 2 ** (n_layers - 2 - 1)
+        model += [nn.ReflectionPad2d(1),
+                  nn.utils.spectral_norm(
+                  nn.Conv2d(ndf * mult, ndf * mult * 2, kernel_size=4, stride=1, padding=0, bias=True)),
+                  nn.LeakyReLU(0.2, True)]
+        # Class Activation Map
+        mult = 2 ** (n_layers - 2)
+        self.gap_fc = nn.utils.spectral_norm(nn.Linear(ndf * mult, 1, bias=False))
+        self.gmp_fc = nn.utils.spectral_norm(nn.Linear(ndf * mult, 1, bias=False))
+        self.conv1x1 = nn.Conv2d(ndf * mult * 2, ndf * mult, kernel_size=1, stride=1, bias=True)
+        self.leaky_relu = nn.LeakyReLU(0.2, True)
+        self.pad = nn.ReflectionPad2d(1)
+        self.conv = nn.utils.spectral_norm(
+            nn.Conv2d(ndf * mult, 1, kernel_size=4, stride=1, padding=0, bias=False))
+        self.model = nn.Sequential(*model)
+    def forward(self, input):
+        x = self.model(input)
+        gap = torch.nn.functional.adaptive_avg_pool2d(x, 1)
+        gap_logit = self.gap_fc(gap.view(x.shape[0], -1))
+        gap_weight = list(self.gap_fc.parameters())[0]
+        gap = x * gap_weight.unsqueeze(2).unsqueeze(3)
+        gmp = torch.nn.functional.adaptive_max_pool2d(x, 1)
+        gmp_logit = self.gmp_fc(gmp.view(x.shape[0], -1))
+        gmp_weight = list(self.gmp_fc.parameters())[0]
+        gmp = x * gmp_weight.unsqueeze(2).unsqueeze(3)
+        cam_logit = torch.cat([gap_logit, gmp_logit], 1)
+        x = torch.cat([gap, gmp], 1)
+        x = self.leaky_relu(self.conv1x1(x))
+        heatmap = torch.sum(x, dim=1, keepdim=True)
+        x = self.pad(x)
+        out = self.conv(x)
+        return out, cam_logit, heatmap
+class RhoClipper(object):
+    def __init__(self, min, max):
+        self.clip_min = min
+        self.clip_max = max
+        assert min < max
+    def __call__(self, module):
+        if hasattr(module, 'rho'):
+            w = module.rho.data
+            w = w.clamp(self.clip_min, self.clip_max)
+            module.rho.data = w
+class WClipper(object):
+    def __init__(self, min, max):
+        self.clip_min = min
+        self.clip_max = max
+        assert min < max
+    def __call__(self, module):
+        if hasattr(module, 'w_gamma'):
+            w = module.w_gamma.data
+            w = w.clamp(self.clip_min, self.clip_max)
+            module.w_gamma.data = w
+        if hasattr(module, 'w_beta'):
+            w = module.w_beta.data
+            w = w.clamp(self.clip_min, self.clip_max)
+            module.w_beta.data = w

p2c/models/photo2cartoon_weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:542914cb8580cb733c7e914d22cc24ddabbbb207516d74ffc793f2a1b6c3eeb3
+size 15290506

p2c/models/photo2cartoon_weights.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e08c84ea4c62251c6157dbf1d3ef44d2549d6aa8c9ee72ec9e4b3089ce5d5f0f
+size 144306956

p2c/predict.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import cog
+import cv2
+import tempfile
+import torch
+import numpy as np
+import os
+from pathlib import Path
+from utils import Preprocess
+from models import ResnetGenerator
+class Predictor(cog.Predictor):
+    def setup(self):
+        pass
+    @cog.input("photo", type=Path, help="portrait photo (size < 1M)")
+    def predict(self, photo):
+        img = cv2.cvtColor(cv2.imread(str(photo)), cv2.COLOR_BGR2RGB)
+        out_path = gen_cartoon(img)
+        return out_path
+def gen_cartoon(img):
+    pre = Preprocess()
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    net = ResnetGenerator(ngf=32, img_size=256, light=True).to(device)
+    assert os.path.exists(
+        './models/photo2cartoon_weights.pt'), "[Step1: load weights] Can not find 'photo2cartoon_weights.pt' in folder 'models!!!'"
+    params = torch.load('./models/photo2cartoon_weights.pt', map_location=device)
+    net.load_state_dict(params['genA2B'])
+    # face alignment and segmentation
+    face_rgba = pre.process(img)
+    if face_rgba is None:
+        return None
+    face_rgba = cv2.resize(face_rgba, (256, 256), interpolation=cv2.INTER_AREA)
+    face = face_rgba[:, :, :3].copy()
+    mask = face_rgba[:, :, 3][:, :, np.newaxis].copy() / 255.
+    face = (face * mask + (1 - mask) * 255) / 127.5 - 1
+    face = np.transpose(face[np.newaxis, :, :, :], (0, 3, 1, 2)).astype(np.float32)
+    face = torch.from_numpy(face).to(device)
+    # inference
+    with torch.no_grad():
+        cartoon = net(face)[0][0]
+    # post-process
+    cartoon = np.transpose(cartoon.cpu().numpy(), (1, 2, 0))
+    cartoon = (cartoon + 1) * 127.5
+    cartoon = (cartoon * mask + 255 * (1 - mask)).astype(np.uint8)
+    cartoon = cv2.cvtColor(cartoon, cv2.COLOR_RGB2BGR)
+    out_path = Path(tempfile.mkdtemp()) / "out.png"
+    cv2.imwrite(str(out_path), cartoon)
+    return out_path

p2c/test.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+import cv2
+import torch
+import numpy as np
+from models import ResnetGenerator
+import argparse
+from utils import Preprocess
+parser = argparse.ArgumentParser()
+parser.add_argument('--photo_path', type=str, help='input photo path')
+parser.add_argument('--save_path', type=str, help='cartoon save path')
+args = parser.parse_args()
+os.makedirs(os.path.dirname(args.save_path), exist_ok=True)
+class Photo2Cartoon:
+    def __init__(self):
+        self.pre = Preprocess()
+        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.net = ResnetGenerator(ngf=32, img_size=256, light=True).to(self.device)
+        assert os.path.exists('./models/photo2cartoon_weights.pt'), "[Step1: load weights] Can not find 'photo2cartoon_weights.pt' in folder 'models!!!'"
+        params = torch.load('./models/photo2cartoon_weights.pt', map_location=self.device)
+        self.net.load_state_dict(params['genA2B'])
+        print('[Step1: load weights] success!')
+    def inference(self, img):
+        # face alignment and segmentation
+        face_rgba = self.pre.process(img)
+        if face_rgba is None:
+            print('[Step2: face detect] can not detect face!!!')
+            return None
+        print('[Step2: face detect] success!')
+        face_rgba = cv2.resize(face_rgba, (256, 256), interpolation=cv2.INTER_AREA)
+        face = face_rgba[:, :, :3].copy()
+        mask = face_rgba[:, :, 3][:, :, np.newaxis].copy() / 255.
+        face = (face*mask + (1-mask)*255) / 127.5 - 1
+        face = np.transpose(face[np.newaxis, :, :, :], (0, 3, 1, 2)).astype(np.float32)
+        face = torch.from_numpy(face).to(self.device)
+        # inference
+        with torch.no_grad():
+            cartoon = self.net(face)[0][0]
+        # post-process
+        cartoon = np.transpose(cartoon.cpu().numpy(), (1, 2, 0))
+        cartoon = (cartoon + 1) * 127.5
+        cartoon = (cartoon * mask + 255 * (1 - mask)).astype(np.uint8)
+        cartoon = cv2.cvtColor(cartoon, cv2.COLOR_RGB2BGR)
+        print('[Step3: photo to cartoon] success!')
+        return cartoon
+if __name__ == '__main__':
+    img = cv2.cvtColor(cv2.imread(args.photo_path), cv2.COLOR_BGR2RGB)
+    c2p = Photo2Cartoon()
+    cartoon = c2p.inference(img)
+    if cartoon is not None:
+        cv2.imwrite(args.save_path, cartoon)
+        print('Cartoon portrait has been saved successfully!')

p2c/test_onnx.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import os
+import cv2
+import numpy as np
+import onnxruntime
+from utils import Preprocess
+class Photo2Cartoon:
+    def __init__(self):
+        self.pre = Preprocess()
+        curPath = os.path.abspath(os.path.dirname(__file__))
+        # assert os.path.exists('./models/photo2cartoon_weights.onnx'), "[Step1: load weights] Can not find 'photo2cartoon_weights.onnx' in folder 'models!!!'"
+        self.session = onnxruntime.InferenceSession(os.path.join(curPath, 'models/photo2cartoon_weights.onnx'))
+        print('[Step1: load weights] success!')
+    def inference(self, in_path):
+        img = cv2.cvtColor(cv2.imread(in_path), cv2.COLOR_BGR2RGB)
+        # face alignment and segmentation
+        face_rgba = self.pre.process(img)
+        if face_rgba is None:
+            print('[Step2: face detect] can not detect face!!!')
+            return None
+        print('[Step2: face detect] success!')
+        face_rgba = cv2.resize(face_rgba, (256, 256), interpolation=cv2.INTER_AREA)
+        face = face_rgba[:, :, :3].copy()
+        mask = face_rgba[:, :, 3][:, :, np.newaxis].copy() / 255.
+        face = (face * mask + (1 - mask) * 255) / 127.5 - 1
+        face = np.transpose(face[np.newaxis, :, :, :], (0, 3, 1, 2)).astype(np.float32)
+        # inference
+        cartoon = self.session.run(['output'], input_feed={'input': face})
+        # post-process
+        cartoon = np.transpose(cartoon[0][0], (1, 2, 0))
+        cartoon = (cartoon + 1) * 127.5
+        cartoon = (cartoon * mask + 255 * (1 - mask)).astype(np.uint8)
+        #cartoon = cv2.cvtColor(cartoon, cv2.COLOR_RGB2BGR)
+        print('[Step3: photo to cartoon] success!')
+        return cartoon
+if __name__ == '__main__':
+    c2p = Photo2Cartoon()
+    cartoon = c2p.inference('')
+    if cartoon is not None:
+        print('Cartoon portrait has been saved successfully!')

p2c/train.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from models import UgatitSadalinHourglass
+import argparse
+import shutil
+from utils import *
+def parse_args():
+    """parsing and configuration"""
+    desc = "photo2cartoon"
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('--phase', type=str, default='train', help='[train / test]')
+    parser.add_argument('--light', type=str2bool, default=True, help='[U-GAT-IT full version / U-GAT-IT light version]')
+    parser.add_argument('--dataset', type=str, default='photo2cartoon', help='dataset name')
+    parser.add_argument('--iteration', type=int, default=1000000, help='The number of training iterations')
+    parser.add_argument('--batch_size', type=int, default=1, help='The size of batch size')
+    parser.add_argument('--print_freq', type=int, default=1000, help='The number of image print freq')
+    parser.add_argument('--save_freq', type=int, default=1000, help='The number of model save freq')
+    parser.add_argument('--decay_flag', type=str2bool, default=True, help='The decay_flag')
+    parser.add_argument('--lr', type=float, default=0.0001, help='The learning rate')
+    parser.add_argument('--adv_weight', type=int, default=1, help='Weight for GAN')
+    parser.add_argument('--cycle_weight', type=int, default=50, help='Weight for Cycle')
+    parser.add_argument('--identity_weight', type=int, default=10, help='Weight for Identity')
+    parser.add_argument('--cam_weight', type=int, default=1000, help='Weight for CAM')
+    parser.add_argument('--faceid_weight', type=int, default=1, help='Weight for Face ID')
+    parser.add_argument('--ch', type=int, default=32, help='base channel number per layer')
+    parser.add_argument('--n_dis', type=int, default=6, help='The number of discriminator layer')
+    parser.add_argument('--img_size', type=int, default=256, help='The size of image')
+    parser.add_argument('--img_ch', type=int, default=3, help='The size of image channel')
+    # parser.add_argument('--device', type=str, default='cuda:0', help='Set gpu mode: [cpu, cuda]')
+    parser.add_argument('--gpu_ids', type=int, default=[0], nargs='+', help='Set [0, 1, 2, 3] for multi-gpu training')
+    parser.add_argument('--benchmark_flag', type=str2bool, default=False)
+    parser.add_argument('--resume', type=str2bool, default=False)
+    parser.add_argument('--rho_clipper', type=float, default=1.0)
+    parser.add_argument('--w_clipper', type=float, default=1.0)
+    parser.add_argument('--pretrained_weights', type=str, default='', help='pretrained weight path')
+    args = parser.parse_args()
+    args.result_dir = './experiment/{}-size{}-ch{}-{}-lr{}-adv{}-cyc{}-id{}-identity{}-cam{}'.format(
+        os.path.basename(__file__)[:-3],
+        args.img_size,
+        args.ch,
+        args.light,
+        args.lr,
+        args.adv_weight,
+        args.cycle_weight,
+        args.faceid_weight,
+        args.identity_weight,
+        args.cam_weight)
+    return check_args(args)
+def check_args(args):
+    check_folder(os.path.join(args.result_dir, args.dataset, 'model'))
+    check_folder(os.path.join(args.result_dir, args.dataset, 'img'))
+    check_folder(os.path.join(args.result_dir, args.dataset, 'test'))
+    shutil.copy(__file__, args.result_dir)
+    return args
+def main():
+    args = parse_args()
+    if args is None:
+        exit()
+    gan = UgatitSadalinHourglass(args)
+    gan.build_model()
+    if args.phase == 'train':
+        gan.train()
+        print(" [*] Training finished!")
+    if args.phase == 'test':
+        gan.test()
+        print(" [*] Test finished!")
+if __name__ == '__main__':
+    main()

p2c/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .preprocess import Preprocess
2	+ from .utils import *

p2c/utils/face_detect.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import cv2
+import math
+import numpy as np
+import face_alignment
+class FaceDetect:
+    def __init__(self, device, detector):
+        # landmarks will be detected by face_alignment library. Set device = 'cuda' if use GPU.
+        self.fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, device=device, face_detector=detector)
+    def align(self, image):
+        landmarks = self.__get_max_face_landmarks(image)
+        if landmarks is None:
+            return None
+        else:
+            return self.__rotate(image, landmarks)
+    def __get_max_face_landmarks(self, image):
+        preds = self.fa.get_landmarks(image)
+        if preds is None:
+            return None
+        elif len(preds) == 1:
+            return preds[0]
+        else:
+            # find max face
+            areas = []
+            for pred in preds:
+                landmarks_top = np.min(pred[:, 1])
+                landmarks_bottom = np.max(pred[:, 1])
+                landmarks_left = np.min(pred[:, 0])
+                landmarks_right = np.max(pred[:, 0])
+                areas.append((landmarks_bottom - landmarks_top) * (landmarks_right - landmarks_left))
+            max_face_index = np.argmax(areas)
+            return preds[max_face_index]
+    @staticmethod
+    def __rotate(image, landmarks):
+        # rotation angle
+        left_eye_corner = landmarks[36]
+        right_eye_corner = landmarks[45]
+        radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) / (left_eye_corner[0] - right_eye_corner[0]))
+        # image size after rotating
+        height, width, _ = image.shape
+        cos = math.cos(radian)
+        sin = math.sin(radian)
+        new_w = int(width * abs(cos) + height * abs(sin))
+        new_h = int(width * abs(sin) + height * abs(cos))
+        # translation
+        Tx = new_w // 2 - width // 2
+        Ty = new_h // 2 - height // 2
+        # affine matrix
+        M = np.array([[cos, sin, (1 - cos) * width / 2. - sin * height / 2. + Tx],
+                      [-sin, cos, sin * width / 2. + (1 - cos) * height / 2. + Ty]])
+        image_rotate = cv2.warpAffine(image, M, (new_w, new_h), borderValue=(255, 255, 255))
+        landmarks = np.concatenate([landmarks, np.ones((landmarks.shape[0], 1))], axis=1)
+        landmarks_rotate = np.dot(M, landmarks.T).T
+        return image_rotate, landmarks_rotate
+if __name__ == '__main__':
+    img = cv2.cvtColor(cv2.imread('3989161_1.jpg'), cv2.COLOR_BGR2RGB)
+    fd = FaceDetect(device='cpu')
+    face_info = fd.align(img)
+    if face_info is not None:
+        image_align, landmarks_align = face_info
+        for i in range(landmarks_align.shape[0]):
+            cv2.circle(image_align, (int(landmarks_align[i][0]), int(landmarks_align[i][1])), 2, (255, 0, 0), -1)
+        cv2.imwrite('image_align.png', cv2.cvtColor(image_align, cv2.COLOR_RGB2BGR))

p2c/utils/face_seg.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import os
+import cv2
+import numpy as np
+import tensorflow as tf
+from tensorflow.python.platform import gfile
+curPath = os.path.abspath(os.path.dirname(__file__))
+class FaceSeg:
+    def __init__(self, model_path=os.path.join(curPath, 'seg_model_384.pb')):
+        config = tf.compat.v1.ConfigProto()
+        config.gpu_options.allow_growth = True
+        self._graph = tf.Graph()
+        self._sess = tf.compat.v1.Session(config=config, graph=self._graph)
+        self.pb_file_path = model_path
+        self._restore_from_pb()
+        self.input_op = self._sess.graph.get_tensor_by_name('input_1:0')
+        self.output_op = self._sess.graph.get_tensor_by_name('sigmoid/Sigmoid:0')
+    def _restore_from_pb(self):
+        with self._sess.as_default():
+            with self._graph.as_default():
+                with gfile.FastGFile(self.pb_file_path, 'rb') as f:
+                    graph_def = tf.compat.v1.GraphDef()
+                    graph_def.ParseFromString(f.read())
+                    tf.import_graph_def(graph_def, name='')
+    def input_transform(self, image):
+        image = cv2.resize(image, (384, 384), interpolation=cv2.INTER_AREA)
+        image_input = (image / 255.)[np.newaxis, :, :, :]
+        return image_input
+    def output_transform(self, output, shape):
+        output = cv2.resize(output, (shape[1], shape[0]))
+        image_output = (output * 255).astype(np.uint8)
+        return image_output
+    def get_mask(self, image):
+        image_input = self.input_transform(image)
+        output = self._sess.run(self.output_op, feed_dict={self.input_op: image_input})[0]
+        return self.output_transform(output, shape=image.shape[:2])

p2c/utils/preprocess.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from .face_detect import FaceDetect
+from .face_seg import FaceSeg
+import numpy as np
+class Preprocess:
+    def __init__(self, device='cpu', detector='dlib'):
+        self.detect = FaceDetect(device, detector)  # device = 'cpu' or 'cuda', detector = 'dlib' or 'sfd'
+        self.segment = FaceSeg()
+    def process(self, image):
+        face_info = self.detect.align(image)
+        if face_info is None:
+            return None
+        image_align, landmarks_align = face_info
+        face = self.__crop(image_align, landmarks_align)
+        mask = self.segment.get_mask(face)
+        return np.dstack((face, mask))
+    @staticmethod
+    def __crop(image, landmarks):
+        landmarks_top = np.min(landmarks[:, 1])
+        landmarks_bottom = np.max(landmarks[:, 1])
+        landmarks_left = np.min(landmarks[:, 0])
+        landmarks_right = np.max(landmarks[:, 0])
+        # expand bbox
+        top = int(landmarks_top - 0.8 * (landmarks_bottom - landmarks_top))
+        bottom = int(landmarks_bottom + 0.3 * (landmarks_bottom - landmarks_top))
+        left = int(landmarks_left - 0.3 * (landmarks_right - landmarks_left))
+        right = int(landmarks_right + 0.3 * (landmarks_right - landmarks_left))
+        if bottom - top > right - left:
+            left -= ((bottom - top) - (right - left)) // 2
+            right = left + (bottom - top)
+        else:
+            top -= ((right - left) - (bottom - top)) // 2
+            bottom = top + (right - left)
+        image_crop = np.ones((bottom - top + 1, right - left + 1, 3), np.uint8) * 255
+        h, w = image.shape[:2]
+        left_white = max(0, -left)
+        left = max(0, left)
+        right = min(right, w-1)
+        right_white = left_white + (right-left)
+        top_white = max(0, -top)
+        top = max(0, top)
+        bottom = min(bottom, h-1)
+        bottom_white = top_white + (bottom - top)
+        image_crop[top_white:bottom_white+1, left_white:right_white+1] = image[top:bottom+1, left:right+1].copy()
+        return image_crop

p2c/utils/seg_model_384.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66a04bc2032b54013d2ae994b34d22518144276f1cbdd2d8cbb1a4a28f50285f
+size 32477258

p2c/utils/utils.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import cv2
+import torch
+import numpy as np
+from scipy import misc
+def load_test_data(image_path, size=256):
+    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+    if img is None:
+        return None
+    h, w, c = img.shape
+    if img.shape[2] == 4:
+        white = np.ones((h, w, 3), np.uint8) * 255
+        img_rgb = img[:, :, :3].copy()
+        mask = img[:, :, 3].copy()
+        mask = (mask / 255).astype(np.uint8)
+        img = (img_rgb * mask[:, :, np.newaxis]).astype(np.uint8) + white * (1 - mask[:, :, np.newaxis])
+    img = cv2.resize(img, (size, size), cv2.INTER_AREA)
+    img = RGB2BGR(img)
+    img = np.expand_dims(img, axis=0)
+    img = preprocessing(img)
+    return img
+def preprocessing(x):
+    x = x/127.5 - 1
+    # -1 ~ 1
+    return x
+def save_images(images, size, image_path):
+    return imsave(inverse_transform(images), size, image_path)
+def inverse_transform(images):
+    return (images+1.) / 2
+def imsave(images, size, path):
+    return misc.imsave(path, merge(images, size))
+def merge(images, size):
+    h, w = images.shape[1], images.shape[2]
+    img = np.zeros((h * size[0], w * size[1], 3))
+    for idx, image in enumerate(images):
+        i = idx % size[1]
+        j = idx // size[1]
+        img[h*j:h*(j+1), w*i:w*(i+1), :] = image
+    return img
+def check_folder(log_dir):
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+    return log_dir
+def str2bool(x):
+    return x.lower() in ('true')
+def cam(x, size=256):
+    x = x - np.min(x)
+    cam_img = x / np.max(x)
+    cam_img = np.uint8(255 * cam_img)
+    cam_img = cv2.resize(cam_img, (size, size))
+    cam_img = cv2.applyColorMap(cam_img, cv2.COLORMAP_JET)
+    return cam_img / 255.0
+def imagenet_norm(x):
+    mean = [0.485, 0.456, 0.406]
+    std = [0.299, 0.224, 0.225]
+    mean = torch.FloatTensor(mean).unsqueeze(0).unsqueeze(2).unsqueeze(3).to(x.device)
+    std = torch.FloatTensor(std).unsqueeze(0).unsqueeze(2).unsqueeze(3).to(x.device)
+    return (x - mean) / std
+def denorm(x):
+    return x * 0.5 + 0.5
+def tensor2numpy(x):
+    return x.detach().cpu().numpy().transpose(1, 2, 0)
+def RGB2BGR(x):
+    return cv2.cvtColor(x, cv2.COLOR_RGB2BGR)

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ cmake
2	+

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+opencv-python-headless==4.5.5.62
+Pillow==9.0.1
+scipy==1.7.3
+tensorflow-gpu==1.14.0
+scikit-image==0.14.5
+onnxruntime
+face-alignment
+dlib