Spaces:

huggan
/

sefa

Runtime error

App Files Files Community

Johannes Kolbe commited on Apr 15, 2022

Commit

ff2b8e3

1 Parent(s): 3b72cdb

add original sefa files back in

Browse files

Files changed (12) hide show

SessionState.py +129 -0
interface.py +128 -0
models/__init__.py +114 -0
models/pggan_discriminator.py +402 -0
models/pggan_generator.py +338 -0
models/stylegan2_discriminator.py +468 -0
models/stylegan2_generator.py +996 -0
models/stylegan_discriminator.py +530 -0
models/stylegan_generator.py +869 -0
models/sync_op.py +18 -0
sefa.py +145 -0
utils.py +509 -0

SessionState.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""Adds pre-session state to StreamLit.
+This file is borrowed from
+https://gist.github.com/tvst/036da038ab3e999a64497f42de966a92
+"""
+# pylint: disable=protected-access
+try:
+    import streamlit.ReportThread as ReportThread
+    from streamlit.server.Server import Server
+except ModuleNotFoundError:
+    # Streamlit >= 0.65.0
+    import streamlit.report_thread as ReportThread
+    from streamlit.server.server import Server
+class SessionState(object):
+    """Hack to add per-session state to Streamlit.
+    Usage
+    -----
+    >>> import SessionState
+    >>>
+    >>> session_state = SessionState.get(user_name='', favorite_color='black')
+    >>> session_state.user_name
+    ''
+    >>> session_state.user_name = 'Mary'
+    >>> session_state.favorite_color
+    'black'
+    Since you set user_name above, next time your script runs this will be the
+    result:
+    >>> session_state = get(user_name='', favorite_color='black')
+    >>> session_state.user_name
+    'Mary'
+    """
+    def __init__(self, **kwargs):
+        """A new SessionState object.
+        Parameters
+        ----------
+        **kwargs : any
+            Default values for the session state.
+        Example
+        -------
+        >>> session_state = SessionState(user_name='', favorite_color='black')
+        >>> session_state.user_name = 'Mary'
+        ''
+        >>> session_state.favorite_color
+        'black'
+        """
+        for key, val in kwargs.items():
+            setattr(self, key, val)
+def get(**kwargs):
+    """Gets a SessionState object for the current session.
+    Creates a new object if necessary.
+    Parameters
+    ----------
+    **kwargs : any
+        Default values you want to add to the session state, if we're creating a
+        new one.
+    Example
+    -------
+    >>> session_state = get(user_name='', favorite_color='black')
+    >>> session_state.user_name
+    ''
+    >>> session_state.user_name = 'Mary'
+    >>> session_state.favorite_color
+    'black'
+    Since you set user_name above, next time your script runs this will be the
+    result:
+    >>> session_state = get(user_name='', favorite_color='black')
+    >>> session_state.user_name
+    'Mary'
+    """
+    # Hack to get the session object from Streamlit.
+    ctx = ReportThread.get_report_ctx()
+    this_session = None
+    current_server = Server.get_current()
+    if hasattr(current_server, '_session_infos'):
+        # Streamlit < 0.56
+        session_infos = Server.get_current()._session_infos.values()
+    else:
+        session_infos = Server.get_current()._session_info_by_id.values()
+    for session_info in session_infos:
+        s = session_info.session
+        if (
+            # Streamlit < 0.54.0
+            (hasattr(s, '_main_dg') and s._main_dg == ctx.main_dg)
+            or
+            # Streamlit >= 0.54.0
+            (not hasattr(s, '_main_dg') and s.enqueue == ctx.enqueue)
+            or
+            # Streamlit >= 0.65.2
+            (not hasattr(s, '_main_dg') and
+             s._uploaded_file_mgr == ctx.uploaded_file_mgr)
+        ):
+            this_session = s
+    if this_session is None:
+        raise RuntimeError(
+            "Oh noes. Couldn't get your Streamlit Session object. "
+            'Are you doing something fancy with threads?')
+    # Got the session object! Now let's attach some state into it.
+    if not hasattr(this_session, '_custom_session_state'):
+        this_session._custom_session_state = SessionState(**kwargs)
+    return this_session._custom_session_state
+# pylint: enable=protected-access

interface.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# python 3.7
+"""Demo."""
+import numpy as np
+import torch
+import streamlit as st
+import SessionState
+from models import parse_gan_type
+from utils import to_tensor
+from utils import postprocess
+from utils import load_generator
+from utils import factorize_weight
+@st.cache(allow_output_mutation=True, show_spinner=False)
+def get_model(model_name):
+    """Gets model by name."""
+    return load_generator(model_name)
+@st.cache(allow_output_mutation=True, show_spinner=False)
+def factorize_model(model, layer_idx):
+    """Factorizes semantics from target layers of the given model."""
+    return factorize_weight(model, layer_idx)
+def sample(model, gan_type, num=1):
+    """Samples latent codes."""
+    codes = torch.randn(num, model.z_space_dim).cuda()
+    if gan_type == 'pggan':
+        codes = model.layer0.pixel_norm(codes)
+    elif gan_type == 'stylegan':
+        codes = model.mapping(codes)['w']
+        codes = model.truncation(codes,
+                                 trunc_psi=0.7,
+                                 trunc_layers=8)
+    elif gan_type == 'stylegan2':
+        codes = model.mapping(codes)['w']
+        codes = model.truncation(codes,
+                                 trunc_psi=0.5,
+                                 trunc_layers=18)
+    codes = codes.detach().cpu().numpy()
+    return codes
+@st.cache(allow_output_mutation=True, show_spinner=False)
+def synthesize(model, gan_type, code):
+    """Synthesizes an image with the give code."""
+    if gan_type == 'pggan':
+        image = model(to_tensor(code))['image']
+    elif gan_type in ['stylegan', 'stylegan2']:
+        image = model.synthesis(to_tensor(code))['image']
+    image = postprocess(image)[0]
+    return image
+def main():
+    """Main function (loop for StreamLit)."""
+    st.title('Closed-Form Factorization of Latent Semantics in GANs')
+    st.sidebar.title('Options')
+    reset = st.sidebar.button('Reset')
+    model_name = st.sidebar.selectbox(
+        'Model to Interpret',
+        ['stylegan_animeface512', 'stylegan_car512', 'stylegan_cat256',
+         'pggan_celebahq1024'])
+    model = get_model(model_name)
+    gan_type = parse_gan_type(model)
+    layer_idx = st.sidebar.selectbox(
+        'Layers to Interpret',
+        ['all', '0-1', '2-5', '6-13'])
+    layers, boundaries, eigen_values = factorize_model(model, layer_idx)
+    num_semantics = st.sidebar.number_input(
+        'Number of semantics', value=10, min_value=0, max_value=None, step=1)
+    steps = {sem_idx: 0 for sem_idx in range(num_semantics)}
+    if gan_type == 'pggan':
+        max_step = 5.0
+    elif gan_type == 'stylegan':
+        max_step = 2.0
+    elif gan_type == 'stylegan2':
+        max_step = 15.0
+    for sem_idx in steps:
+        eigen_value = eigen_values[sem_idx]
+        steps[sem_idx] = st.sidebar.slider(
+            f'Semantic {sem_idx:03d} (eigen value: {eigen_value:.3f})',
+            value=0.0,
+            min_value=-max_step,
+            max_value=max_step,
+            step=0.04 * max_step if not reset else 0.0)
+    image_placeholder = st.empty()
+    button_placeholder = st.empty()
+    try:
+        base_codes = np.load(f'latent_codes/{model_name}_latents.npy')
+    except FileNotFoundError:
+        base_codes = sample(model, gan_type)
+    state = SessionState.get(model_name=model_name,
+                             code_idx=0,
+                             codes=base_codes[0:1])
+    if state.model_name != model_name:
+        state.model_name = model_name
+        state.code_idx = 0
+        state.codes = base_codes[0:1]
+    if button_placeholder.button('Random', key=0):
+        state.code_idx += 1
+        if state.code_idx < base_codes.shape[0]:
+            state.codes = base_codes[state.code_idx][np.newaxis]
+        else:
+            state.codes = sample(model, gan_type)
+    code = state.codes.copy()
+    for sem_idx, step in steps.items():
+        if gan_type == 'pggan':
+            code += boundaries[sem_idx:sem_idx + 1] * step
+        elif gan_type in ['stylegan', 'stylegan2']:
+            code[:, layers, :] += boundaries[sem_idx:sem_idx + 1] * step
+    image = synthesize(model, gan_type, code)
+    image_placeholder.image(image / 255.0)
+if __name__ == '__main__':
+    main()

models/__init__.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# python3.7
+"""Collects all available models together."""
+from .model_zoo import MODEL_ZOO
+from .pggan_generator import PGGANGenerator
+from .pggan_discriminator import PGGANDiscriminator
+from .stylegan_generator import StyleGANGenerator
+from .stylegan_discriminator import StyleGANDiscriminator
+from .stylegan2_generator import StyleGAN2Generator
+from .stylegan2_discriminator import StyleGAN2Discriminator
+__all__ = [
+    'MODEL_ZOO', 'PGGANGenerator', 'PGGANDiscriminator', 'StyleGANGenerator',
+    'StyleGANDiscriminator', 'StyleGAN2Generator', 'StyleGAN2Discriminator',
+    'build_generator', 'build_discriminator', 'build_model'
+]
+_GAN_TYPES_ALLOWED = ['pggan', 'stylegan', 'stylegan2']
+_MODULES_ALLOWED = ['generator', 'discriminator']
+def build_generator(gan_type, resolution, **kwargs):
+    """Builds generator by GAN type.
+    Args:
+        gan_type: GAN type to which the generator belong.
+        resolution: Synthesis resolution.
+        **kwargs: Additional arguments to build the generator.
+    Raises:
+        ValueError: If the `gan_type` is not supported.
+        NotImplementedError: If the `gan_type` is not implemented.
+    """
+    if gan_type not in _GAN_TYPES_ALLOWED:
+        raise ValueError(f'Invalid GAN type: `{gan_type}`!\n'
+                         f'Types allowed: {_GAN_TYPES_ALLOWED}.')
+    if gan_type == 'pggan':
+        return PGGANGenerator(resolution, **kwargs)
+    if gan_type == 'stylegan':
+        return StyleGANGenerator(resolution, **kwargs)
+    if gan_type == 'stylegan2':
+        return StyleGAN2Generator(resolution, **kwargs)
+    raise NotImplementedError(f'Unsupported GAN type `{gan_type}`!')
+def build_discriminator(gan_type, resolution, **kwargs):
+    """Builds discriminator by GAN type.
+    Args:
+        gan_type: GAN type to which the discriminator belong.
+        resolution: Synthesis resolution.
+        **kwargs: Additional arguments to build the discriminator.
+    Raises:
+        ValueError: If the `gan_type` is not supported.
+        NotImplementedError: If the `gan_type` is not implemented.
+    """
+    if gan_type not in _GAN_TYPES_ALLOWED:
+        raise ValueError(f'Invalid GAN type: `{gan_type}`!\n'
+                         f'Types allowed: {_GAN_TYPES_ALLOWED}.')
+    if gan_type == 'pggan':
+        return PGGANDiscriminator(resolution, **kwargs)
+    if gan_type == 'stylegan':
+        return StyleGANDiscriminator(resolution, **kwargs)
+    if gan_type == 'stylegan2':
+        return StyleGAN2Discriminator(resolution, **kwargs)
+    raise NotImplementedError(f'Unsupported GAN type `{gan_type}`!')
+def build_model(gan_type, module, resolution, **kwargs):
+    """Builds a GAN module (generator/discriminator/etc).
+    Args:
+        gan_type: GAN type to which the model belong.
+        module: GAN module to build, such as generator or discrimiantor.
+        resolution: Synthesis resolution.
+        **kwargs: Additional arguments to build the discriminator.
+    Raises:
+        ValueError: If the `module` is not supported.
+        NotImplementedError: If the `module` is not implemented.
+    """
+    if module not in _MODULES_ALLOWED:
+        raise ValueError(f'Invalid module: `{module}`!\n'
+                         f'Modules allowed: {_MODULES_ALLOWED}.')
+    if module == 'generator':
+        return build_generator(gan_type, resolution, **kwargs)
+    if module == 'discriminator':
+        return build_discriminator(gan_type, resolution, **kwargs)
+    raise NotImplementedError(f'Unsupported module `{module}`!')
+def parse_gan_type(module):
+    """Parses GAN type of a given module.
+    Args:
+        module: The module to parse GAN type from.
+    Returns:
+        A string, indicating the GAN type.
+    Raises:
+        ValueError: If the GAN type is unknown.
+    """
+    if isinstance(module, (PGGANGenerator, PGGANDiscriminator)):
+        return 'pggan'
+    if isinstance(module, (StyleGANGenerator, StyleGANDiscriminator)):
+        return 'stylegan'
+    if isinstance(module, (StyleGAN2Generator, StyleGAN2Discriminator)):
+        return 'stylegan2'
+    raise ValueError(f'Unable to parse GAN type from type `{type(module)}`!')

models/pggan_discriminator.py ADDED Viewed

	@@ -0,0 +1,402 @@

+# python3.7
+"""Contains the implementation of discriminator described in PGGAN.
+Paper: https://arxiv.org/pdf/1710.10196.pdf
+Official TensorFlow implementation:
+https://github.com/tkarras/progressive_growing_of_gans
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['PGGANDiscriminator']
+# Resolutions allowed.
+_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024]
+# Initial resolution.
+_INIT_RES = 4
+# Default gain factor for weight scaling.
+_WSCALE_GAIN = np.sqrt(2.0)
+class PGGANDiscriminator(nn.Module):
+    """Defines the discriminator network in PGGAN.
+    NOTE: The discriminator takes images with `RGB` channel order and pixel
+    range [-1, 1] as inputs.
+    Settings for the network:
+    (1) resolution: The resolution of the input image.
+    (2) image_channels: Number of channels of the input image. (default: 3)
+    (3) label_size: Size of the additional label for conditional generation.
+        (default: 0)
+    (4) fused_scale: Whether to fused `conv2d` and `downsample` together,
+        resulting in `conv2d` with strides. (default: False)
+    (5) use_wscale: Whether to use weight scaling. (default: True)
+    (6) minibatch_std_group_size: Group size for the minibatch standard
+        deviation layer. 0 means disable. (default: 16)
+    (7) fmaps_base: Factor to control number of feature maps for each layer.
+        (default: 16 << 10)
+    (8) fmaps_max: Maximum number of feature maps in each layer. (default: 512)
+    """
+    def __init__(self,
+                 resolution,
+                 image_channels=3,
+                 label_size=0,
+                 fused_scale=False,
+                 use_wscale=True,
+                 minibatch_std_group_size=16,
+                 fmaps_base=16 << 10,
+                 fmaps_max=512):
+        """Initializes with basic settings.
+        Raises:
+            ValueError: If the `resolution` is not supported.
+        """
+        super().__init__()
+        if resolution not in _RESOLUTIONS_ALLOWED:
+            raise ValueError(f'Invalid resolution: `{resolution}`!\n'
+                             f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.')
+        self.init_res = _INIT_RES
+        self.init_res_log2 = int(np.log2(self.init_res))
+        self.resolution = resolution
+        self.final_res_log2 = int(np.log2(self.resolution))
+        self.image_channels = image_channels
+        self.label_size = label_size
+        self.fused_scale = fused_scale
+        self.use_wscale = use_wscale
+        self.minibatch_std_group_size = minibatch_std_group_size
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        # Level of detail (used for progressive training).
+        self.register_buffer('lod', torch.zeros(()))
+        self.pth_to_tf_var_mapping = {'lod': 'lod'}
+        for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1):
+            res = 2 ** res_log2
+            block_idx = self.final_res_log2 - res_log2
+            # Input convolution layer for each resolution.
+            self.add_module(
+                f'input{block_idx}',
+                ConvBlock(in_channels=self.image_channels,
+                          out_channels=self.get_nf(res),
+                          kernel_size=1,
+                          padding=0,
+                          use_wscale=self.use_wscale))
+            self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = (
+                f'FromRGB_lod{block_idx}/weight')
+            self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = (
+                f'FromRGB_lod{block_idx}/bias')
+            # Convolution block for each resolution (except the last one).
+            if res != self.init_res:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res),
+                              use_wscale=self.use_wscale))
+                tf_layer0_name = 'Conv0'
+                self.add_module(
+                    f'layer{2 * block_idx + 1}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res // 2),
+                              downsample=True,
+                              fused_scale=self.fused_scale,
+                              use_wscale=self.use_wscale))
+                tf_layer1_name = 'Conv1_down' if self.fused_scale else 'Conv1'
+            # Convolution block for last resolution.
+            else:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(
+                        in_channels=self.get_nf(res),
+                        out_channels=self.get_nf(res),
+                        use_wscale=self.use_wscale,
+                        minibatch_std_group_size=self.minibatch_std_group_size))
+                tf_layer0_name = 'Conv'
+                self.add_module(
+                    f'layer{2 * block_idx + 1}',
+                    DenseBlock(in_channels=self.get_nf(res) * res * res,
+                               out_channels=self.get_nf(res // 2),
+                               use_wscale=self.use_wscale))
+                tf_layer1_name = 'Dense0'
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = (
+                f'{res}x{res}/{tf_layer0_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = (
+                f'{res}x{res}/{tf_layer0_name}/bias')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = (
+                f'{res}x{res}/{tf_layer1_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = (
+                f'{res}x{res}/{tf_layer1_name}/bias')
+        # Final dense block.
+        self.add_module(
+            f'layer{2 * block_idx + 2}',
+            DenseBlock(in_channels=self.get_nf(res // 2),
+                       out_channels=1 + self.label_size,
+                       use_wscale=self.use_wscale,
+                       wscale_gain=1.0,
+                       activation_type='linear'))
+        self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = (
+            f'{res}x{res}/Dense1/weight')
+        self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = (
+            f'{res}x{res}/Dense1/bias')
+        self.downsample = DownsamplingLayer()
+    def get_nf(self, res):
+        """Gets number of feature maps according to current resolution."""
+        return min(self.fmaps_base // res, self.fmaps_max)
+    def forward(self, image, lod=None, **_unused_kwargs):
+        expected_shape = (self.image_channels, self.resolution, self.resolution)
+        if image.ndim != 4 or image.shape[1:] != expected_shape:
+            raise ValueError(f'The input tensor should be with shape '
+                             f'[batch_size, channel, height, width], where '
+                             f'`channel` equals to {self.image_channels}, '
+                             f'`height`, `width` equal to {self.resolution}!\n'
+                             f'But `{image.shape}` is received!')
+        lod = self.lod.cpu().tolist() if lod is None else lod
+        if lod + self.init_res_log2 > self.final_res_log2:
+            raise ValueError(f'Maximum level-of-detail (lod) is '
+                             f'{self.final_res_log2 - self.init_res_log2}, '
+                             f'but `{lod}` is received!')
+        lod = self.lod.cpu().tolist()
+        for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1):
+            block_idx = current_lod = self.final_res_log2 - res_log2
+            if current_lod <= lod < current_lod + 1:
+                x = self.__getattr__(f'input{block_idx}')(image)
+            elif current_lod - 1 < lod < current_lod:
+                alpha = lod - np.floor(lod)
+                x = (self.__getattr__(f'input{block_idx}')(image) * alpha +
+                     x * (1 - alpha))
+            if lod < current_lod + 1:
+                x = self.__getattr__(f'layer{2 * block_idx}')(x)
+                x = self.__getattr__(f'layer{2 * block_idx + 1}')(x)
+            if lod > current_lod:
+                image = self.downsample(image)
+        x = self.__getattr__(f'layer{2 * block_idx + 2}')(x)
+        return x
+class MiniBatchSTDLayer(nn.Module):
+    """Implements the minibatch standard deviation layer."""
+    def __init__(self, group_size=16, epsilon=1e-8):
+        super().__init__()
+        self.group_size = group_size
+        self.epsilon = epsilon
+    def forward(self, x):
+        if self.group_size <= 1:
+            return x
+        group_size = min(self.group_size, x.shape[0])                  # [NCHW]
+        y = x.view(group_size, -1, x.shape[1], x.shape[2], x.shape[3]) # [GMCHW]
+        y = y - torch.mean(y, dim=0, keepdim=True)                     # [GMCHW]
+        y = torch.mean(y ** 2, dim=0)                                  # [MCHW]
+        y = torch.sqrt(y + self.epsilon)                               # [MCHW]
+        y = torch.mean(y, dim=[1, 2, 3], keepdim=True)                 # [M111]
+        y = y.repeat(group_size, 1, x.shape[2], x.shape[3])            # [N1HW]
+        return torch.cat([x, y], dim=1)
+class DownsamplingLayer(nn.Module):
+    """Implements the downsampling layer.
+    Basically, this layer can be used to downsample feature maps with average
+    pooling.
+    """
+    def __init__(self, scale_factor=2):
+        super().__init__()
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        if self.scale_factor <= 1:
+            return x
+        return F.avg_pool2d(x,
+                            kernel_size=self.scale_factor,
+                            stride=self.scale_factor,
+                            padding=0)
+class ConvBlock(nn.Module):
+    """Implements the convolutional block.
+    Basically, this block executes minibatch standard deviation layer (if
+    needed), convolutional layer, activation layer, and downsampling layer (
+    if needed) in sequence.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 add_bias=True,
+                 downsample=False,
+                 fused_scale=False,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 activation_type='lrelu',
+                 minibatch_std_group_size=0):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            stride: Stride parameter for convolution operation. (default: 1)
+            padding: Padding parameter for convolution operation. (default: 1)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            downsample: Whether to downsample the result after convolution.
+                (default: False)
+            fused_scale: Whether to fused `conv2d` and `downsample` together,
+                resulting in `conv2d` with strides. (default: False)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+            minibatch_std_group_size: Group size for the minibatch standard
+                deviation layer. 0 means disable. (default: 0)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        if minibatch_std_group_size > 1:
+            in_channels = in_channels + 1
+            self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size)
+        else:
+            self.mbstd = nn.Identity()
+        if downsample and not fused_scale:
+            self.downsample = DownsamplingLayer()
+        else:
+            self.downsample = nn.Identity()
+        if downsample and fused_scale:
+            self.use_stride = True
+            self.stride = 2
+            self.padding = 1
+        else:
+            self.use_stride = False
+            self.stride = stride
+            self.padding = padding
+        weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape))
+            self.wscale = wscale
+        else:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale)
+            self.wscale = 1.0
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        x = self.mbstd(x)
+        weight = self.weight * self.wscale
+        if self.use_stride:
+            weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0)
+            weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] +
+                      weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) * 0.25
+        x = F.conv2d(x,
+                     weight=weight,
+                     bias=self.bias,
+                     stride=self.stride,
+                     padding=self.padding)
+        x = self.activate(x)
+        x = self.downsample(x)
+        return x
+class DenseBlock(nn.Module):
+    """Implements the dense block.
+    Basically, this block executes fully-connected layer, and activation layer.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 add_bias=True,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            add_bias: Whether to add bias onto the fully-connected result.
+                (default: True)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        weight_shape = (out_channels, in_channels)
+        wscale = wscale_gain / np.sqrt(in_channels)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape))
+            self.wscale = wscale
+        else:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale)
+            self.wscale = 1.0
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        if x.ndim != 2:
+            x = x.view(x.shape[0], -1)
+        x = F.linear(x, weight=self.weight * self.wscale, bias=self.bias)
+        x = self.activate(x)
+        return x

models/pggan_generator.py ADDED Viewed

	@@ -0,0 +1,338 @@

+# python3.7
+"""Contains the implementation of generator described in PGGAN.
+Paper: https://arxiv.org/pdf/1710.10196.pdf
+Official TensorFlow implementation:
+https://github.com/tkarras/progressive_growing_of_gans
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['PGGANGenerator']
+# Resolutions allowed.
+_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024]
+# Initial resolution.
+_INIT_RES = 4
+# Default gain factor for weight scaling.
+_WSCALE_GAIN = np.sqrt(2.0)
+class PGGANGenerator(nn.Module):
+    """Defines the generator network in PGGAN.
+    NOTE: The synthesized images are with `RGB` channel order and pixel range
+    [-1, 1].
+    Settings for the network:
+    (1) resolution: The resolution of the output image.
+    (2) z_space_dim: The dimension of the latent space, Z. (default: 512)
+    (3) image_channels: Number of channels of the output image. (default: 3)
+    (4) final_tanh: Whether to use `tanh` to control the final pixel range.
+        (default: False)
+    (5) label_size: Size of the additional label for conditional generation.
+        (default: 0)
+    (6) fused_scale: Whether to fused `upsample` and `conv2d` together,
+        resulting in `conv2d_transpose`. (default: False)
+    (7) use_wscale: Whether to use weight scaling. (default: True)
+    (8) fmaps_base: Factor to control number of feature maps for each layer.
+        (default: 16 << 10)
+    (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512)
+    """
+    def __init__(self,
+                 resolution,
+                 z_space_dim=512,
+                 image_channels=3,
+                 final_tanh=False,
+                 label_size=0,
+                 fused_scale=False,
+                 use_wscale=True,
+                 fmaps_base=16 << 10,
+                 fmaps_max=512):
+        """Initializes with basic settings.
+        Raises:
+            ValueError: If the `resolution` is not supported.
+        """
+        super().__init__()
+        if resolution not in _RESOLUTIONS_ALLOWED:
+            raise ValueError(f'Invalid resolution: `{resolution}`!\n'
+                             f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.')
+        self.init_res = _INIT_RES
+        self.init_res_log2 = int(np.log2(self.init_res))
+        self.resolution = resolution
+        self.final_res_log2 = int(np.log2(self.resolution))
+        self.z_space_dim = z_space_dim
+        self.image_channels = image_channels
+        self.final_tanh = final_tanh
+        self.label_size = label_size
+        self.fused_scale = fused_scale
+        self.use_wscale = use_wscale
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        # Number of convolutional layers.
+        self.num_layers = (self.final_res_log2 - self.init_res_log2 + 1) * 2
+        # Level of detail (used for progressive training).
+        self.register_buffer('lod', torch.zeros(()))
+        self.pth_to_tf_var_mapping = {'lod': 'lod'}
+        for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1):
+            res = 2 ** res_log2
+            block_idx = res_log2 - self.init_res_log2
+            # First convolution layer for each resolution.
+            if res == self.init_res:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.z_space_dim + self.label_size,
+                              out_channels=self.get_nf(res),
+                              kernel_size=self.init_res,
+                              padding=self.init_res - 1,
+                              use_wscale=self.use_wscale))
+                tf_layer_name = 'Dense'
+            else:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.get_nf(res // 2),
+                              out_channels=self.get_nf(res),
+                              upsample=True,
+                              fused_scale=self.fused_scale,
+                              use_wscale=self.use_wscale))
+                tf_layer_name = 'Conv0_up' if self.fused_scale else 'Conv0'
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/bias')
+            # Second convolution layer for each resolution.
+            self.add_module(
+                f'layer{2 * block_idx + 1}',
+                ConvBlock(in_channels=self.get_nf(res),
+                          out_channels=self.get_nf(res),
+                          use_wscale=self.use_wscale))
+            tf_layer_name = 'Conv' if res == self.init_res else 'Conv1'
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/bias')
+            # Output convolution layer for each resolution.
+            self.add_module(
+                f'output{block_idx}',
+                ConvBlock(in_channels=self.get_nf(res),
+                          out_channels=self.image_channels,
+                          kernel_size=1,
+                          padding=0,
+                          use_wscale=self.use_wscale,
+                          wscale_gain=1.0,
+                          activation_type='linear'))
+            self.pth_to_tf_var_mapping[f'output{block_idx}.weight'] = (
+                f'ToRGB_lod{self.final_res_log2 - res_log2}/weight')
+            self.pth_to_tf_var_mapping[f'output{block_idx}.bias'] = (
+                f'ToRGB_lod{self.final_res_log2 - res_log2}/bias')
+        self.upsample = UpsamplingLayer()
+        self.final_activate = nn.Tanh() if self.final_tanh else nn.Identity()
+    def get_nf(self, res):
+        """Gets number of feature maps according to current resolution."""
+        return min(self.fmaps_base // res, self.fmaps_max)
+    def forward(self, z, label=None, lod=None, **_unused_kwargs):
+        if z.ndim != 2 or z.shape[1] != self.z_space_dim:
+            raise ValueError(f'Input latent code should be with shape '
+                             f'[batch_size, latent_dim], where '
+                             f'`latent_dim` equals to {self.z_space_dim}!\n'
+                             f'But `{z.shape}` is received!')
+        z = self.layer0.pixel_norm(z)
+        if self.label_size:
+            if label is None:
+                raise ValueError(f'Model requires an additional label '
+                                 f'(with size {self.label_size}) as input, '
+                                 f'but no label is received!')
+            if label.ndim != 2 or label.shape != (z.shape[0], self.label_size):
+                raise ValueError(f'Input label should be with shape '
+                                 f'[batch_size, label_size], where '
+                                 f'`batch_size` equals to that of '
+                                 f'latent codes ({z.shape[0]}) and '
+                                 f'`label_size` equals to {self.label_size}!\n'
+                                 f'But `{label.shape}` is received!')
+            z = torch.cat((z, label), dim=1)
+        lod = self.lod.cpu().tolist() if lod is None else lod
+        if lod + self.init_res_log2 > self.final_res_log2:
+            raise ValueError(f'Maximum level-of-detail (lod) is '
+                             f'{self.final_res_log2 - self.init_res_log2}, '
+                             f'but `{lod}` is received!')
+        x = z.view(z.shape[0], self.z_space_dim + self.label_size, 1, 1)
+        for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1):
+            current_lod = self.final_res_log2 - res_log2
+            if lod < current_lod + 1:
+                block_idx = res_log2 - self.init_res_log2
+                x = self.__getattr__(f'layer{2 * block_idx}')(x)
+                x = self.__getattr__(f'layer{2 * block_idx + 1}')(x)
+            if current_lod - 1 < lod <= current_lod:
+                image = self.__getattr__(f'output{block_idx}')(x)
+            elif current_lod < lod < current_lod + 1:
+                alpha = np.ceil(lod) - lod
+                image = (self.__getattr__(f'output{block_idx}')(x) * alpha +
+                         self.upsample(image) * (1 - alpha))
+            elif lod >= current_lod + 1:
+                image = self.upsample(image)
+        image = self.final_activate(image)
+        results = {
+            'z': z,
+            'label': label,
+            'image': image,
+        }
+        return results
+class PixelNormLayer(nn.Module):
+    """Implements pixel-wise feature vector normalization layer."""
+    def __init__(self, epsilon=1e-8):
+        super().__init__()
+        self.eps = epsilon
+    def forward(self, x):
+        norm = torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.eps)
+        return x / norm
+class UpsamplingLayer(nn.Module):
+    """Implements the upsampling layer.
+    Basically, this layer can be used to upsample feature maps with nearest
+    neighbor interpolation.
+    """
+    def __init__(self, scale_factor=2):
+        super().__init__()
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        if self.scale_factor <= 1:
+            return x
+        return F.interpolate(x, scale_factor=self.scale_factor, mode='nearest')
+class ConvBlock(nn.Module):
+    """Implements the convolutional block.
+    Basically, this block executes pixel-wise normalization layer, upsampling
+    layer (if needed), convolutional layer, and activation layer in sequence.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 add_bias=True,
+                 upsample=False,
+                 fused_scale=False,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            stride: Stride parameter for convolution operation. (default: 1)
+            padding: Padding parameter for convolution operation. (default: 1)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            upsample: Whether to upsample the input tensor before convolution.
+                (default: False)
+            fused_scale: Whether to fused `upsample` and `conv2d` together,
+                resulting in `conv2d_transpose`. (default: False)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        self.pixel_norm = PixelNormLayer()
+        if upsample and not fused_scale:
+            self.upsample = UpsamplingLayer()
+        else:
+            self.upsample = nn.Identity()
+        if upsample and fused_scale:
+            self.use_conv2d_transpose = True
+            weight_shape = (in_channels, out_channels, kernel_size, kernel_size)
+            self.stride = 2
+            self.padding = 1
+        else:
+            self.use_conv2d_transpose = False
+            weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+            self.stride = stride
+            self.padding = padding
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape))
+            self.wscale = wscale
+        else:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale)
+            self.wscale = 1.0
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        x = self.pixel_norm(x)
+        x = self.upsample(x)
+        weight = self.weight * self.wscale
+        if self.use_conv2d_transpose:
+            weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0)
+            weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] +
+                      weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1])
+            x = F.conv_transpose2d(x,
+                                   weight=weight,
+                                   bias=self.bias,
+                                   stride=self.stride,
+                                   padding=self.padding)
+        else:
+            x = F.conv2d(x,
+                         weight=weight,
+                         bias=self.bias,
+                         stride=self.stride,
+                         padding=self.padding)
+        x = self.activate(x)
+        return x

models/stylegan2_discriminator.py ADDED Viewed

	@@ -0,0 +1,468 @@

+# python3.7
+"""Contains the implementation of discriminator described in StyleGAN2.
+Compared to that of StyleGAN, the discriminator in StyleGAN2 mainly adds skip
+connections, increases model size and disables progressive growth. This script
+ONLY supports config F in the original paper.
+Paper: https://arxiv.org/pdf/1912.04958.pdf
+Official TensorFlow implementation: https://github.com/NVlabs/stylegan2
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['StyleGAN2Discriminator']
+# Resolutions allowed.
+_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024]
+# Initial resolution.
+_INIT_RES = 4
+# Architectures allowed.
+_ARCHITECTURES_ALLOWED = ['resnet', 'skip', 'origin']
+# Default gain factor for weight scaling.
+_WSCALE_GAIN = 1.0
+class StyleGAN2Discriminator(nn.Module):
+    """Defines the discriminator network in StyleGAN2.
+    NOTE: The discriminator takes images with `RGB` channel order and pixel
+    range [-1, 1] as inputs.
+    Settings for the network:
+    (1) resolution: The resolution of the input image.
+    (2) image_channels: Number of channels of the input image. (default: 3)
+    (3) label_size: Size of the additional label for conditional generation.
+        (default: 0)
+    (4) architecture: Type of architecture. Support `origin`, `skip`, and
+        `resnet`. (default: `resnet`)
+    (5) use_wscale: Whether to use weight scaling. (default: True)
+    (6) minibatch_std_group_size: Group size for the minibatch standard
+        deviation layer. 0 means disable. (default: 4)
+    (7) minibatch_std_channels: Number of new channels after the minibatch
+        standard deviation layer. (default: 1)
+    (8) fmaps_base: Factor to control number of feature maps for each layer.
+        (default: 32 << 10)
+    (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512)
+    """
+    def __init__(self,
+                 resolution,
+                 image_channels=3,
+                 label_size=0,
+                 architecture='resnet',
+                 use_wscale=True,
+                 minibatch_std_group_size=4,
+                 minibatch_std_channels=1,
+                 fmaps_base=32 << 10,
+                 fmaps_max=512):
+        """Initializes with basic settings.
+        Raises:
+            ValueError: If the `resolution` is not supported, or `architecture`
+                is not supported.
+        """
+        super().__init__()
+        if resolution not in _RESOLUTIONS_ALLOWED:
+            raise ValueError(f'Invalid resolution: `{resolution}`!\n'
+                             f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.')
+        if architecture not in _ARCHITECTURES_ALLOWED:
+            raise ValueError(f'Invalid architecture: `{architecture}`!\n'
+                             f'Architectures allowed: '
+                             f'{_ARCHITECTURES_ALLOWED}.')
+        self.init_res = _INIT_RES
+        self.init_res_log2 = int(np.log2(self.init_res))
+        self.resolution = resolution
+        self.final_res_log2 = int(np.log2(self.resolution))
+        self.image_channels = image_channels
+        self.label_size = label_size
+        self.architecture = architecture
+        self.use_wscale = use_wscale
+        self.minibatch_std_group_size = minibatch_std_group_size
+        self.minibatch_std_channels = minibatch_std_channels
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        self.pth_to_tf_var_mapping = {}
+        for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1):
+            res = 2 ** res_log2
+            block_idx = self.final_res_log2 - res_log2
+            # Input convolution layer for each resolution (if needed).
+            if res_log2 == self.final_res_log2 or self.architecture == 'skip':
+                self.add_module(
+                    f'input{block_idx}',
+                    ConvBlock(in_channels=self.image_channels,
+                              out_channels=self.get_nf(res),
+                              kernel_size=1,
+                              use_wscale=self.use_wscale))
+                self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = (
+                    f'{res}x{res}/FromRGB/weight')
+                self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = (
+                    f'{res}x{res}/FromRGB/bias')
+            # Convolution block for each resolution (except the last one).
+            if res != self.init_res:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res),
+                              use_wscale=self.use_wscale))
+                tf_layer0_name = 'Conv0'
+                self.add_module(
+                    f'layer{2 * block_idx + 1}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res // 2),
+                              scale_factor=2,
+                              use_wscale=self.use_wscale))
+                tf_layer1_name = 'Conv1_down'
+                if self.architecture == 'resnet':
+                    layer_name = f'skip_layer{block_idx}'
+                    self.add_module(
+                        layer_name,
+                        ConvBlock(in_channels=self.get_nf(res),
+                                  out_channels=self.get_nf(res // 2),
+                                  kernel_size=1,
+                                  add_bias=False,
+                                  scale_factor=2,
+                                  use_wscale=self.use_wscale,
+                                  activation_type='linear'))
+                    self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                        f'{res}x{res}/Skip/weight')
+            # Convolution block for last resolution.
+            else:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res),
+                              use_wscale=self.use_wscale,
+                              minibatch_std_group_size=minibatch_std_group_size,
+                              minibatch_std_channels=minibatch_std_channels))
+                tf_layer0_name = 'Conv'
+                self.add_module(
+                    f'layer{2 * block_idx + 1}',
+                    DenseBlock(in_channels=self.get_nf(res) * res * res,
+                               out_channels=self.get_nf(res // 2),
+                               use_wscale=self.use_wscale))
+                tf_layer1_name = 'Dense0'
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = (
+                f'{res}x{res}/{tf_layer0_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = (
+                f'{res}x{res}/{tf_layer0_name}/bias')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = (
+                f'{res}x{res}/{tf_layer1_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = (
+                f'{res}x{res}/{tf_layer1_name}/bias')
+            # Final dense block.
+            self.add_module(
+                f'layer{2 * block_idx + 2}',
+                DenseBlock(in_channels=self.get_nf(res // 2),
+                           out_channels=max(self.label_size, 1),
+                           use_wscale=self.use_wscale,
+                           activation_type='linear'))
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = (
+                f'Output/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = (
+                f'Output/bias')
+        if self.architecture == 'skip':
+            self.downsample = DownsamplingLayer()
+    def get_nf(self, res):
+        """Gets number of feature maps according to current resolution."""
+        return min(self.fmaps_base // res, self.fmaps_max)
+    def forward(self, image, label=None, **_unused_kwargs):
+        expected_shape = (self.image_channels, self.resolution, self.resolution)
+        if image.ndim != 4 or image.shape[1:] != expected_shape:
+            raise ValueError(f'The input tensor should be with shape '
+                             f'[batch_size, channel, height, width], where '
+                             f'`channel` equals to {self.image_channels}, '
+                             f'`height`, `width` equal to {self.resolution}!\n'
+                             f'But `{image.shape}` is received!')
+        if self.label_size:
+            if label is None:
+                raise ValueError(f'Model requires an additional label '
+                                 f'(with size {self.label_size}) as inputs, '
+                                 f'but no label is received!')
+            batch_size = image.shape[0]
+            if label.ndim != 2 or label.shape != (batch_size, self.label_size):
+                raise ValueError(f'Input label should be with shape '
+                                 f'[batch_size, label_size], where '
+                                 f'`batch_size` equals to that of '
+                                 f'images ({image.shape[0]}) and '
+                                 f'`label_size` equals to {self.label_size}!\n'
+                                 f'But `{label.shape}` is received!')
+        x = self.input0(image)
+        for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1):
+            block_idx = self.final_res_log2 - res_log2
+            if self.architecture == 'skip' and block_idx > 0:
+                image = self.downsample(image)
+                x = x + self.__getattr__(f'input{block_idx}')(image)
+            if self.architecture == 'resnet' and res_log2 != self.init_res_log2:
+                residual = self.__getattr__(f'skip_layer{block_idx}')(x)
+            x = self.__getattr__(f'layer{2 * block_idx}')(x)
+            x = self.__getattr__(f'layer{2 * block_idx + 1}')(x)
+            if self.architecture == 'resnet' and res_log2 != self.init_res_log2:
+                x = (x + residual) / np.sqrt(2.0)
+        x = self.__getattr__(f'layer{2 * block_idx + 2}')(x)
+        if self.label_size:
+            x = torch.sum(x * label, dim=1, keepdim=True)
+        return x
+class MiniBatchSTDLayer(nn.Module):
+    """Implements the minibatch standard deviation layer."""
+    def __init__(self, group_size=4, new_channels=1, epsilon=1e-8):
+        super().__init__()
+        self.group_size = group_size
+        self.new_channels = new_channels
+        self.epsilon = epsilon
+    def forward(self, x):
+        if self.group_size <= 1:
+            return x
+        ng = min(self.group_size, x.shape[0])
+        nc = self.new_channels
+        temp_c = x.shape[1] // nc                               # [NCHW]
+        y = x.view(ng, -1, nc, temp_c, x.shape[2], x.shape[3])  # [GMncHW]
+        y = y - torch.mean(y, dim=0, keepdim=True)              # [GMncHW]
+        y = torch.mean(y ** 2, dim=0)                           # [MncHW]
+        y = torch.sqrt(y + self.epsilon)                        # [MncHW]
+        y = torch.mean(y, dim=[2, 3, 4], keepdim=True)          # [Mn111]
+        y = torch.mean(y, dim=2)                                # [Mn11]
+        y = y.repeat(ng, 1, x.shape[2], x.shape[3])             # [NnHW]
+        return torch.cat([x, y], dim=1)
+class DownsamplingLayer(nn.Module):
+    """Implements the downsampling layer.
+    This layer can also be used as filtering by setting `scale_factor` as 1.
+    """
+    def __init__(self, scale_factor=2, kernel=(1, 3, 3, 1), extra_padding=0):
+        super().__init__()
+        assert scale_factor >= 1
+        self.scale_factor = scale_factor
+        if extra_padding != 0:
+            assert scale_factor == 1
+        if kernel is None:
+            kernel = np.ones((scale_factor), dtype=np.float32)
+        else:
+            kernel = np.array(kernel, dtype=np.float32)
+        assert kernel.ndim == 1
+        kernel = np.outer(kernel, kernel)
+        kernel = kernel / np.sum(kernel)
+        assert kernel.ndim == 2
+        assert kernel.shape[0] == kernel.shape[1]
+        kernel = kernel[np.newaxis, np.newaxis]
+        self.register_buffer('kernel', torch.from_numpy(kernel))
+        self.kernel = self.kernel.flip(0, 1)
+        padding = kernel.shape[2] - scale_factor + extra_padding
+        self.padding = ((padding + 1) // 2, padding // 2,
+                        (padding + 1) // 2, padding // 2)
+    def forward(self, x):
+        assert x.ndim == 4
+        channels = x.shape[1]
+        x = x.view(-1, 1, x.shape[2], x.shape[3])
+        x = F.pad(x, self.padding, mode='constant', value=0)
+        x = F.conv2d(x, self.kernel, stride=self.scale_factor)
+        x = x.view(-1, channels, x.shape[2], x.shape[3])
+        return x
+class ConvBlock(nn.Module):
+    """Implements the convolutional block.
+    Basically, this block executes minibatch standard deviation layer (if
+    needed), filtering layer (if needed), convolutional layer, and activation
+    layer in sequence.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 add_bias=True,
+                 scale_factor=1,
+                 filtering_kernel=(1, 3, 3, 1),
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu',
+                 minibatch_std_group_size=0,
+                 minibatch_std_channels=1):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            scale_factor: Scale factor for downsampling. `1` means skip
+                downsampling. (default: 1)
+            filtering_kernel: Kernel used for filtering before downsampling.
+                (default: (1, 3, 3, 1))
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+            minibatch_std_group_size: Group size for the minibatch standard
+                deviation layer. 0 means disable. (default: 0)
+            minibatch_std_channels: Number of new channels after the minibatch
+                standard deviation layer. (default: 1)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        if minibatch_std_group_size > 1:
+            in_channels = in_channels + minibatch_std_channels
+            self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size,
+                                           new_channels=minibatch_std_channels)
+        else:
+            self.mbstd = nn.Identity()
+        if scale_factor > 1:
+            extra_padding = kernel_size - scale_factor
+            self.filter = DownsamplingLayer(scale_factor=1,
+                                            kernel=filtering_kernel,
+                                            extra_padding=extra_padding)
+            self.stride = scale_factor
+            self.padding = 0  # Padding is done in `DownsamplingLayer`.
+        else:
+            self.filter = nn.Identity()
+            assert kernel_size % 2 == 1
+            self.stride = 1
+            self.padding = kernel_size // 2
+        weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        self.bscale = lr_mul
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+            self.activate_scale = 1.0
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+            self.activate_scale = np.sqrt(2.0)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        x = self.mbstd(x)
+        x = self.filter(x)
+        weight = self.weight * self.wscale
+        bias = self.bias * self.bscale if self.bias is not None else None
+        x = F.conv2d(x,
+                     weight=weight,
+                     bias=bias,
+                     stride=self.stride,
+                     padding=self.padding)
+        x = self.activate(x) * self.activate_scale
+        return x
+class DenseBlock(nn.Module):
+    """Implements the dense block.
+    Basically, this block executes fully-connected layer and activation layer.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 add_bias=True,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            add_bias: Whether to add bias onto the fully-connected result.
+                (default: True)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        weight_shape = (out_channels, in_channels)
+        wscale = wscale_gain / np.sqrt(in_channels)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        self.bscale = lr_mul
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+            self.activate_scale = 1.0
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+            self.activate_scale = np.sqrt(2.0)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        if x.ndim != 2:
+            x = x.view(x.shape[0], -1)
+        bias = self.bias * self.bscale if self.bias is not None else None
+        x = F.linear(x, weight=self.weight * self.wscale, bias=bias)
+        x = self.activate(x) * self.activate_scale
+        return x

models/stylegan2_generator.py ADDED Viewed

	@@ -0,0 +1,996 @@

+# python3.7
+"""Contains the implementation of generator described in StyleGAN2.
+Compared to that of StyleGAN, the generator in StyleGAN2 mainly introduces style
+demodulation, adds skip connections, increases model size, and disables
+progressive growth. This script ONLY supports config F in the original paper.
+Paper: https://arxiv.org/pdf/1912.04958.pdf
+Official TensorFlow implementation: https://github.com/NVlabs/stylegan2
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .sync_op import all_gather
+__all__ = ['StyleGAN2Generator']
+# Resolutions allowed.
+_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024]
+# Initial resolution.
+_INIT_RES = 4
+# Architectures allowed.
+_ARCHITECTURES_ALLOWED = ['resnet', 'skip', 'origin']
+# Default gain factor for weight scaling.
+_WSCALE_GAIN = 1.0
+class StyleGAN2Generator(nn.Module):
+    """Defines the generator network in StyleGAN2.
+    NOTE: The synthesized images are with `RGB` channel order and pixel range
+    [-1, 1].
+    Settings for the mapping network:
+    (1) z_space_dim: Dimension of the input latent space, Z. (default: 512)
+    (2) w_space_dim: Dimension of the outout latent space, W. (default: 512)
+    (3) label_size: Size of the additional label for conditional generation.
+        (default: 0)
+    (4）mapping_layers: Number of layers of the mapping network. (default: 8)
+    (5) mapping_fmaps: Number of hidden channels of the mapping network.
+        (default: 512)
+    (6) mapping_lr_mul: Learning rate multiplier for the mapping network.
+        (default: 0.01)
+    (7) repeat_w: Repeat w-code for different layers.
+    Settings for the synthesis network:
+    (1) resolution: The resolution of the output image.
+    (2) image_channels: Number of channels of the output image. (default: 3)
+    (3) final_tanh: Whether to use `tanh` to control the final pixel range.
+        (default: False)
+    (4) const_input: Whether to use a constant in the first convolutional layer.
+        (default: True)
+    (5) architecture: Type of architecture. Support `origin`, `skip`, and
+        `resnet`. (default: `resnet`)
+    (6) fused_modulate: Whether to fuse `style_modulate` and `conv2d` together.
+        (default: True)
+    (7) demodulate: Whether to perform style demodulation. (default: True)
+    (8) use_wscale: Whether to use weight scaling. (default: True)
+    (9) fmaps_base: Factor to control number of feature maps for each layer.
+        (default: 16 << 10)
+    (10) fmaps_max: Maximum number of feature maps in each layer. (default: 512)
+    """
+    def __init__(self,
+                 resolution,
+                 z_space_dim=512,
+                 w_space_dim=512,
+                 label_size=0,
+                 mapping_layers=8,
+                 mapping_fmaps=512,
+                 mapping_lr_mul=0.01,
+                 repeat_w=True,
+                 image_channels=3,
+                 final_tanh=False,
+                 const_input=True,
+                 architecture='skip',
+                 fused_modulate=True,
+                 demodulate=True,
+                 use_wscale=True,
+                 fmaps_base=32 << 10,
+                 fmaps_max=512):
+        """Initializes with basic settings.
+        Raises:
+            ValueError: If the `resolution` is not supported, or `architecture`
+                is not supported.
+        """
+        super().__init__()
+        if resolution not in _RESOLUTIONS_ALLOWED:
+            raise ValueError(f'Invalid resolution: `{resolution}`!\n'
+                             f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.')
+        if architecture not in _ARCHITECTURES_ALLOWED:
+            raise ValueError(f'Invalid architecture: `{architecture}`!\n'
+                             f'Architectures allowed: '
+                             f'{_ARCHITECTURES_ALLOWED}.')
+        self.init_res = _INIT_RES
+        self.resolution = resolution
+        self.z_space_dim = z_space_dim
+        self.w_space_dim = w_space_dim
+        self.label_size = label_size
+        self.mapping_layers = mapping_layers
+        self.mapping_fmaps = mapping_fmaps
+        self.mapping_lr_mul = mapping_lr_mul
+        self.repeat_w = repeat_w
+        self.image_channels = image_channels
+        self.final_tanh = final_tanh
+        self.const_input = const_input
+        self.architecture = architecture
+        self.fused_modulate = fused_modulate
+        self.demodulate = demodulate
+        self.use_wscale = use_wscale
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        self.num_layers = int(np.log2(self.resolution // self.init_res * 2)) * 2
+        if self.repeat_w:
+            self.mapping_space_dim = self.w_space_dim
+        else:
+            self.mapping_space_dim = self.w_space_dim * self.num_layers
+        self.mapping = MappingModule(input_space_dim=self.z_space_dim,
+                                     hidden_space_dim=self.mapping_fmaps,
+                                     final_space_dim=self.mapping_space_dim,
+                                     label_size=self.label_size,
+                                     num_layers=self.mapping_layers,
+                                     use_wscale=self.use_wscale,
+                                     lr_mul=self.mapping_lr_mul)
+        self.truncation = TruncationModule(w_space_dim=self.w_space_dim,
+                                           num_layers=self.num_layers,
+                                           repeat_w=self.repeat_w)
+        self.synthesis = SynthesisModule(resolution=self.resolution,
+                                         init_resolution=self.init_res,
+                                         w_space_dim=self.w_space_dim,
+                                         image_channels=self.image_channels,
+                                         final_tanh=self.final_tanh,
+                                         const_input=self.const_input,
+                                         architecture=self.architecture,
+                                         fused_modulate=self.fused_modulate,
+                                         demodulate=self.demodulate,
+                                         use_wscale=self.use_wscale,
+                                         fmaps_base=self.fmaps_base,
+                                         fmaps_max=self.fmaps_max)
+        self.pth_to_tf_var_mapping = {}
+        for key, val in self.mapping.pth_to_tf_var_mapping.items():
+            self.pth_to_tf_var_mapping[f'mapping.{key}'] = val
+        for key, val in self.truncation.pth_to_tf_var_mapping.items():
+            self.pth_to_tf_var_mapping[f'truncation.{key}'] = val
+        for key, val in self.synthesis.pth_to_tf_var_mapping.items():
+            self.pth_to_tf_var_mapping[f'synthesis.{key}'] = val
+    def forward(self,
+                z,
+                label=None,
+                w_moving_decay=0.995,
+                style_mixing_prob=0.9,
+                trunc_psi=None,
+                trunc_layers=None,
+                randomize_noise=False,
+                **_unused_kwargs):
+        mapping_results = self.mapping(z, label)
+        w = mapping_results['w']
+        if self.training and w_moving_decay < 1:
+            batch_w_avg = all_gather(w).mean(dim=0)
+            self.truncation.w_avg.copy_(
+                self.truncation.w_avg * w_moving_decay +
+                batch_w_avg * (1 - w_moving_decay))
+        if self.training and style_mixing_prob > 0:
+            new_z = torch.randn_like(z)
+            new_w = self.mapping(new_z, label)['w']
+            if np.random.uniform() < style_mixing_prob:
+                mixing_cutoff = np.random.randint(1, self.num_layers)
+                w = self.truncation(w)
+                new_w = self.truncation(new_w)
+                w[:, :mixing_cutoff] = new_w[:, :mixing_cutoff]
+        wp = self.truncation(w, trunc_psi, trunc_layers)
+        synthesis_results = self.synthesis(wp, randomize_noise)
+        return {**mapping_results, **synthesis_results}
+class MappingModule(nn.Module):
+    """Implements the latent space mapping module.
+    Basically, this module executes several dense layers in sequence.
+    """
+    def __init__(self,
+                 input_space_dim=512,
+                 hidden_space_dim=512,
+                 final_space_dim=512,
+                 label_size=0,
+                 num_layers=8,
+                 normalize_input=True,
+                 use_wscale=True,
+                 lr_mul=0.01):
+        super().__init__()
+        self.input_space_dim = input_space_dim
+        self.hidden_space_dim = hidden_space_dim
+        self.final_space_dim = final_space_dim
+        self.label_size = label_size
+        self.num_layers = num_layers
+        self.normalize_input = normalize_input
+        self.use_wscale = use_wscale
+        self.lr_mul = lr_mul
+        self.norm = PixelNormLayer() if self.normalize_input else nn.Identity()
+        self.pth_to_tf_var_mapping = {}
+        for i in range(num_layers):
+            dim_mul = 2 if label_size else 1
+            in_channels = (input_space_dim * dim_mul if i == 0 else
+                           hidden_space_dim)
+            out_channels = (final_space_dim if i == (num_layers - 1) else
+                            hidden_space_dim)
+            self.add_module(f'dense{i}',
+                            DenseBlock(in_channels=in_channels,
+                                       out_channels=out_channels,
+                                       use_wscale=self.use_wscale,
+                                       lr_mul=self.lr_mul))
+            self.pth_to_tf_var_mapping[f'dense{i}.weight'] = f'Dense{i}/weight'
+            self.pth_to_tf_var_mapping[f'dense{i}.bias'] = f'Dense{i}/bias'
+        if label_size:
+            self.label_weight = nn.Parameter(
+                torch.randn(label_size, input_space_dim))
+            self.pth_to_tf_var_mapping[f'label_weight'] = f'LabelConcat/weight'
+    def forward(self, z, label=None):
+        if z.ndim != 2 or z.shape[1] != self.input_space_dim:
+            raise ValueError(f'Input latent code should be with shape '
+                             f'[batch_size, input_dim], where '
+                             f'`input_dim` equals to {self.input_space_dim}!\n'
+                             f'But `{z.shape}` is received!')
+        if self.label_size:
+            if label is None:
+                raise ValueError(f'Model requires an additional label '
+                                 f'(with size {self.label_size}) as input, '
+                                 f'but no label is received!')
+            if label.ndim != 2 or label.shape != (z.shape[0], self.label_size):
+                raise ValueError(f'Input label should be with shape '
+                                 f'[batch_size, label_size], where '
+                                 f'`batch_size` equals to that of '
+                                 f'latent codes ({z.shape[0]}) and '
+                                 f'`label_size` equals to {self.label_size}!\n'
+                                 f'But `{label.shape}` is received!')
+            embedding = torch.matmul(label, self.label_weight)
+            z = torch.cat((z, embedding), dim=1)
+        z = self.norm(z)
+        w = z
+        for i in range(self.num_layers):
+            w = self.__getattr__(f'dense{i}')(w)
+        results = {
+            'z': z,
+            'label': label,
+            'w': w,
+        }
+        if self.label_size:
+            results['embedding'] = embedding
+        return results
+class TruncationModule(nn.Module):
+    """Implements the truncation module.
+    Truncation is executed as follows:
+    For layers in range [0, truncation_layers), the truncated w-code is computed
+    as
+    w_new = w_avg + (w - w_avg) * truncation_psi
+    To disable truncation, please set
+    (1) truncation_psi = 1.0 (None) OR
+    (2) truncation_layers = 0 (None)
+    NOTE: The returned tensor is layer-wise style codes.
+    """
+    def __init__(self, w_space_dim, num_layers, repeat_w=True):
+        super().__init__()
+        self.num_layers = num_layers
+        self.w_space_dim = w_space_dim
+        self.repeat_w = repeat_w
+        if self.repeat_w:
+            self.register_buffer('w_avg', torch.zeros(w_space_dim))
+        else:
+            self.register_buffer('w_avg', torch.zeros(num_layers * w_space_dim))
+        self.pth_to_tf_var_mapping = {'w_avg': 'dlatent_avg'}
+    def forward(self, w, trunc_psi=None, trunc_layers=None):
+        if w.ndim == 2:
+            if self.repeat_w and w.shape[1] == self.w_space_dim:
+                w = w.view(-1, 1, self.w_space_dim)
+                wp = w.repeat(1, self.num_layers, 1)
+            else:
+                assert w.shape[1] == self.w_space_dim * self.num_layers
+                wp = w.view(-1, self.num_layers, self.w_space_dim)
+        else:
+            wp = w
+        assert wp.ndim == 3
+        assert wp.shape[1:] == (self.num_layers, self.w_space_dim)
+        trunc_psi = 1.0 if trunc_psi is None else trunc_psi
+        trunc_layers = 0 if trunc_layers is None else trunc_layers
+        if trunc_psi < 1.0 and trunc_layers > 0:
+            layer_idx = np.arange(self.num_layers).reshape(1, -1, 1)
+            coefs = np.ones_like(layer_idx, dtype=np.float32)
+            coefs[layer_idx < trunc_layers] *= trunc_psi
+            coefs = torch.from_numpy(coefs).to(wp)
+            w_avg = self.w_avg.view(1, -1, self.w_space_dim)
+            wp = w_avg + (wp - w_avg) * coefs
+        return wp
+class SynthesisModule(nn.Module):
+    """Implements the image synthesis module.
+    Basically, this module executes several convolutional layers in sequence.
+    """
+    def __init__(self,
+                 resolution=1024,
+                 init_resolution=4,
+                 w_space_dim=512,
+                 image_channels=3,
+                 final_tanh=False,
+                 const_input=True,
+                 architecture='skip',
+                 fused_modulate=True,
+                 demodulate=True,
+                 use_wscale=True,
+                 fmaps_base=32 << 10,
+                 fmaps_max=512):
+        super().__init__()
+        self.init_res = init_resolution
+        self.init_res_log2 = int(np.log2(self.init_res))
+        self.resolution = resolution
+        self.final_res_log2 = int(np.log2(self.resolution))
+        self.w_space_dim = w_space_dim
+        self.image_channels = image_channels
+        self.final_tanh = final_tanh
+        self.const_input = const_input
+        self.architecture = architecture
+        self.fused_modulate = fused_modulate
+        self.demodulate = demodulate
+        self.use_wscale = use_wscale
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        self.num_layers = (self.final_res_log2 - self.init_res_log2 + 1) * 2
+        self.pth_to_tf_var_mapping = {}
+        for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1):
+            res = 2 ** res_log2
+            block_idx = res_log2 - self.init_res_log2
+            # First convolution layer for each resolution.
+            if res == self.init_res:
+                if self.const_input:
+                    self.add_module(f'early_layer',
+                                    InputBlock(init_resolution=self.init_res,
+                                               channels=self.get_nf(res)))
+                    self.pth_to_tf_var_mapping[f'early_layer.const'] = (
+                        f'{res}x{res}/Const/const')
+                else:
+                    self.add_module(f'early_layer',
+                                    DenseBlock(in_channels=self.w_space_dim,
+                                               out_channels=self.get_nf(res),
+                                               use_wscale=self.use_wscale))
+                    self.pth_to_tf_var_mapping[f'early_layer.weight'] = (
+                        f'{res}x{res}/Dense/weight')
+                    self.pth_to_tf_var_mapping[f'early_layer.bias'] = (
+                        f'{res}x{res}/Dense/bias')
+            else:
+                layer_name = f'layer{2 * block_idx - 1}'
+                self.add_module(
+                    layer_name,
+                    ModulateConvBlock(in_channels=self.get_nf(res // 2),
+                                      out_channels=self.get_nf(res),
+                                      resolution=res,
+                                      w_space_dim=self.w_space_dim,
+                                      scale_factor=2,
+                                      fused_modulate=self.fused_modulate,
+                                      demodulate=self.demodulate,
+                                      use_wscale=self.use_wscale))
+                self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                    f'{res}x{res}/Conv0_up/weight')
+                self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = (
+                    f'{res}x{res}/Conv0_up/bias')
+                self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = (
+                    f'{res}x{res}/Conv0_up/mod_weight')
+                self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = (
+                    f'{res}x{res}/Conv0_up/mod_bias')
+                self.pth_to_tf_var_mapping[f'{layer_name}.noise_strength'] = (
+                    f'{res}x{res}/Conv0_up/noise_strength')
+                self.pth_to_tf_var_mapping[f'{layer_name}.noise'] = (
+                    f'noise{2 * block_idx - 1}')
+                if self.architecture == 'resnet':
+                    layer_name = f'layer{2 * block_idx - 1}'
+                    self.add_module(
+                        layer_name,
+                        ConvBlock(in_channels=self.get_nf(res // 2),
+                                  out_channels=self.get_nf(res),
+                                  kernel_size=1,
+                                  add_bias=False,
+                                  scale_factor=2,
+                                  use_wscale=self.use_wscale,
+                                  activation_type='linear'))
+                    self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                        f'{res}x{res}/Skip/weight')
+            # Second convolution layer for each resolution.
+            layer_name = f'layer{2 * block_idx}'
+            self.add_module(
+                layer_name,
+                ModulateConvBlock(in_channels=self.get_nf(res),
+                                  out_channels=self.get_nf(res),
+                                  resolution=res,
+                                  w_space_dim=self.w_space_dim,
+                                  fused_modulate=self.fused_modulate,
+                                  demodulate=self.demodulate,
+                                  use_wscale=self.use_wscale))
+            tf_layer_name = 'Conv' if res == self.init_res else 'Conv1'
+            self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/bias')
+            self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/mod_weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/mod_bias')
+            self.pth_to_tf_var_mapping[f'{layer_name}.noise_strength'] = (
+                f'{res}x{res}/{tf_layer_name}/noise_strength')
+            self.pth_to_tf_var_mapping[f'{layer_name}.noise'] = (
+                f'noise{2 * block_idx}')
+            # Output convolution layer for each resolution (if needed).
+            if res_log2 == self.final_res_log2 or self.architecture == 'skip':
+                layer_name = f'output{block_idx}'
+                self.add_module(
+                    layer_name,
+                    ModulateConvBlock(in_channels=self.get_nf(res),
+                                      out_channels=image_channels,
+                                      resolution=res,
+                                      w_space_dim=self.w_space_dim,
+                                      kernel_size=1,
+                                      fused_modulate=self.fused_modulate,
+                                      demodulate=False,
+                                      use_wscale=self.use_wscale,
+                                      add_noise=False,
+                                      activation_type='linear'))
+                self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                    f'{res}x{res}/ToRGB/weight')
+                self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = (
+                    f'{res}x{res}/ToRGB/bias')
+                self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = (
+                    f'{res}x{res}/ToRGB/mod_weight')
+                self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = (
+                    f'{res}x{res}/ToRGB/mod_bias')
+        if self.architecture == 'skip':
+            self.upsample = UpsamplingLayer()
+        self.final_activate = nn.Tanh() if final_tanh else nn.Identity()
+    def get_nf(self, res):
+        """Gets number of feature maps according to current resolution."""
+        return min(self.fmaps_base // res, self.fmaps_max)
+    def forward(self, wp, randomize_noise=False):
+        if wp.ndim != 3 or wp.shape[1:] != (self.num_layers, self.w_space_dim):
+            raise ValueError(f'Input tensor should be with shape '
+                             f'[batch_size, num_layers, w_space_dim], where '
+                             f'`num_layers` equals to {self.num_layers}, and '
+                             f'`w_space_dim` equals to {self.w_space_dim}!\n'
+                             f'But `{wp.shape}` is received!')
+        results = {'wp': wp}
+        x = self.early_layer(wp[:, 0])
+        if self.architecture == 'origin':
+            for layer_idx in range(self.num_layers - 1):
+                x, style = self.__getattr__(f'layer{layer_idx}')(
+                    x, wp[:, layer_idx], randomize_noise)
+                results[f'style{layer_idx:02d}'] = style
+            image, style = self.__getattr__(f'output{layer_idx // 2}')(
+                x, wp[:, layer_idx + 1])
+            results[f'output_style{layer_idx // 2}'] = style
+        elif self.architecture == 'skip':
+            for layer_idx in range(self.num_layers - 1):
+                x, style = self.__getattr__(f'layer{layer_idx}')(
+                    x, wp[:, layer_idx], randomize_noise)
+                results[f'style{layer_idx:02d}'] = style
+                if layer_idx % 2 == 0:
+                    temp, style = self.__getattr__(f'output{layer_idx // 2}')(
+                        x, wp[:, layer_idx + 1])
+                    results[f'output_style{layer_idx // 2}'] = style
+                    if layer_idx == 0:
+                        image = temp
+                    else:
+                        image = temp + self.upsample(image)
+        elif self.architecture == 'resnet':
+            x, style = self.layer0(x)
+            results[f'style00'] = style
+            for layer_idx in range(1, self.num_layers - 1, 2):
+                residual = self.__getattr__(f'skip_layer{layer_idx // 2}')(x)
+                x, style = self.__getattr__(f'layer{layer_idx}')(
+                    x, wp[:, layer_idx], randomize_noise)
+                results[f'style{layer_idx:02d}'] = style
+                x, style = self.__getattr__(f'layer{layer_idx + 1}')(
+                    x, wp[:, layer_idx + 1], randomize_noise)
+                results[f'style{layer_idx + 1:02d}'] = style
+                x = (x + residual) / np.sqrt(2.0)
+            image, style = self.__getattr__(f'output{layer_idx // 2 + 1}')(
+                x, wp[:, layer_idx + 2])
+            results[f'output_style{layer_idx // 2}'] = style
+        results['image'] = self.final_activate(image)
+        return results
+class PixelNormLayer(nn.Module):
+    """Implements pixel-wise feature vector normalization layer."""
+    def __init__(self, dim=1, epsilon=1e-8):
+        super().__init__()
+        self.dim = dim
+        self.eps = epsilon
+    def forward(self, x):
+        norm = torch.sqrt(
+            torch.mean(x ** 2, dim=self.dim, keepdim=True) + self.eps)
+        return x / norm
+class UpsamplingLayer(nn.Module):
+    """Implements the upsampling layer.
+    This layer can also be used as filtering by setting `scale_factor` as 1.
+    """
+    def __init__(self,
+                 scale_factor=2,
+                 kernel=(1, 3, 3, 1),
+                 extra_padding=0,
+                 kernel_gain=None):
+        super().__init__()
+        assert scale_factor >= 1
+        self.scale_factor = scale_factor
+        if extra_padding != 0:
+            assert scale_factor == 1
+        if kernel is None:
+            kernel = np.ones((scale_factor), dtype=np.float32)
+        else:
+            kernel = np.array(kernel, dtype=np.float32)
+        assert kernel.ndim == 1
+        kernel = np.outer(kernel, kernel)
+        kernel = kernel / np.sum(kernel)
+        if kernel_gain is None:
+            kernel = kernel * (scale_factor ** 2)
+        else:
+            assert kernel_gain > 0
+            kernel = kernel * (kernel_gain ** 2)
+        assert kernel.ndim == 2
+        assert kernel.shape[0] == kernel.shape[1]
+        kernel = kernel[np.newaxis, np.newaxis]
+        self.register_buffer('kernel', torch.from_numpy(kernel))
+        self.kernel = self.kernel.flip(0, 1)
+        self.upsample_padding = (0, scale_factor - 1,  # Width padding.
+                                 0, 0,                 # Width.
+                                 0, scale_factor - 1,  # Height padding.
+                                 0, 0,                 # Height.
+                                 0, 0,                 # Channel.
+                                 0, 0)                 # Batch size.
+        padding = kernel.shape[2] - scale_factor + extra_padding
+        self.padding = ((padding + 1) // 2 + scale_factor - 1, padding // 2,
+                        (padding + 1) // 2 + scale_factor - 1, padding // 2)
+    def forward(self, x):
+        assert x.ndim == 4
+        channels = x.shape[1]
+        if self.scale_factor > 1:
+            x = x.view(-1, channels, x.shape[2], 1, x.shape[3], 1)
+            x = F.pad(x, self.upsample_padding, mode='constant', value=0)
+            x = x.view(-1, channels, x.shape[2] * self.scale_factor,
+                       x.shape[4] * self.scale_factor)
+        x = x.view(-1, 1, x.shape[2], x.shape[3])
+        x = F.pad(x, self.padding, mode='constant', value=0)
+        x = F.conv2d(x, self.kernel, stride=1)
+        x = x.view(-1, channels, x.shape[2], x.shape[3])
+        return x
+class InputBlock(nn.Module):
+    """Implements the input block.
+    Basically, this block starts from a const input, which is with shape
+    `(channels, init_resolution, init_resolution)`.
+    """
+    def __init__(self, init_resolution, channels):
+        super().__init__()
+        self.const = nn.Parameter(
+            torch.randn(1, channels, init_resolution, init_resolution))
+    def forward(self, w):
+        x = self.const.repeat(w.shape[0], 1, 1, 1)
+        return x
+class ConvBlock(nn.Module):
+    """Implements the convolutional block (no style modulation).
+    Basically, this block executes, convolutional layer, filtering layer (if
+    needed), and activation layer in sequence.
+    NOTE: This block is particularly used for skip-connection branch in the
+    `resnet` structure.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 add_bias=True,
+                 scale_factor=1,
+                 filtering_kernel=(1, 3, 3, 1),
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            scale_factor: Scale factor for upsampling. `1` means skip
+                upsampling. (default: 1)
+            filtering_kernel: Kernel used for filtering after upsampling.
+                (default: (1, 3, 3, 1))
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        if scale_factor > 1:
+            self.use_conv2d_transpose = True
+            extra_padding = scale_factor - kernel_size
+            self.filter = UpsamplingLayer(scale_factor=1,
+                                          kernel=filtering_kernel,
+                                          extra_padding=extra_padding,
+                                          kernel_gain=scale_factor)
+            self.stride = scale_factor
+            self.padding = 0  # Padding is done in `UpsamplingLayer`.
+        else:
+            self.use_conv2d_transpose = False
+            assert kernel_size % 2 == 1
+            self.stride = 1
+            self.padding = kernel_size // 2
+        weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        self.bscale = lr_mul
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+            self.activate_scale = 1.0
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+            self.activate_scale = np.sqrt(2.0)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        weight = self.weight * self.wscale
+        bias = self.bias * self.bscale if self.bias is not None else None
+        if self.use_conv2d_transpose:
+            weight = weight.permute(1, 0, 2, 3).flip(2, 3)
+            x = F.conv_transpose2d(x,
+                                   weight=weight,
+                                   bias=bias,
+                                   stride=self.scale_factor,
+                                   padding=self.padding)
+            x = self.filter(x)
+        else:
+            x = F.conv2d(x,
+                         weight=weight,
+                         bias=bias,
+                         stride=self.stride,
+                         padding=self.padding)
+        x = self.activate(x) * self.activate_scale
+        return x
+class ModulateConvBlock(nn.Module):
+    """Implements the convolutional block with style modulation."""
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 resolution,
+                 w_space_dim,
+                 kernel_size=3,
+                 add_bias=True,
+                 scale_factor=1,
+                 filtering_kernel=(1, 3, 3, 1),
+                 fused_modulate=True,
+                 demodulate=True,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 add_noise=True,
+                 activation_type='lrelu',
+                 epsilon=1e-8):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            resolution: Resolution of the output tensor.
+            w_space_dim: Dimension of W space for style modulation.
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            scale_factor: Scale factor for upsampling. `1` means skip
+                upsampling. (default: 1)
+            filtering_kernel: Kernel used for filtering after upsampling.
+                (default: (1, 3, 3, 1))
+            fused_modulate: Whether to fuse `style_modulate` and `conv2d`
+                together. (default: True)
+            demodulate: Whether to perform style demodulation. (default: True)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            add_noise: Whether to add noise onto the output tensor. (default:
+                True)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+            epsilon: Small number to avoid `divide by zero`. (default: 1e-8)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        self.res = resolution
+        self.in_c = in_channels
+        self.out_c = out_channels
+        self.ksize = kernel_size
+        self.eps = epsilon
+        if scale_factor > 1:
+            self.use_conv2d_transpose = True
+            extra_padding = scale_factor - kernel_size
+            self.filter = UpsamplingLayer(scale_factor=1,
+                                          kernel=filtering_kernel,
+                                          extra_padding=extra_padding,
+                                          kernel_gain=scale_factor)
+            self.stride = scale_factor
+            self.padding = 0  # Padding is done in `UpsamplingLayer`.
+        else:
+            self.use_conv2d_transpose = False
+            assert kernel_size % 2 == 1
+            self.stride = 1
+            self.padding = kernel_size // 2
+        weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        self.style = DenseBlock(in_channels=w_space_dim,
+                                out_channels=in_channels,
+                                additional_bias=1.0,
+                                use_wscale=use_wscale,
+                                activation_type='linear')
+        self.fused_modulate = fused_modulate
+        self.demodulate = demodulate
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        self.bscale = lr_mul
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+            self.activate_scale = 1.0
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+            self.activate_scale = np.sqrt(2.0)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+        self.add_noise = add_noise
+        if self.add_noise:
+            self.register_buffer('noise', torch.randn(1, 1, self.res, self.res))
+            self.noise_strength = nn.Parameter(torch.zeros(()))
+    def forward(self, x, w, randomize_noise=False):
+        batch = x.shape[0]
+        weight = self.weight * self.wscale
+        weight = weight.permute(2, 3, 1, 0)
+        # Style modulation.
+        style = self.style(w)
+        _weight = weight.view(1, self.ksize, self.ksize, self.in_c, self.out_c)
+        _weight = _weight * style.view(batch, 1, 1, self.in_c, 1)
+        # Style demodulation.
+        if self.demodulate:
+            _weight_norm = torch.sqrt(
+                torch.sum(_weight ** 2, dim=[1, 2, 3]) + self.eps)
+            _weight = _weight / _weight_norm.view(batch, 1, 1, 1, self.out_c)
+        if self.fused_modulate:
+            x = x.view(1, batch * self.in_c, x.shape[2], x.shape[3])
+            weight = _weight.permute(1, 2, 3, 0, 4).reshape(
+                self.ksize, self.ksize, self.in_c, batch * self.out_c)
+        else:
+            x = x * style.view(batch, self.in_c, 1, 1)
+        if self.use_conv2d_transpose:
+            weight = weight.flip(0, 1)
+            if self.fused_modulate:
+                weight = weight.view(
+                    self.ksize, self.ksize, self.in_c, batch, self.out_c)
+                weight = weight.permute(0, 1, 4, 3, 2)
+                weight = weight.reshape(
+                    self.ksize, self.ksize, self.out_c, batch * self.in_c)
+                weight = weight.permute(3, 2, 0, 1)
+            else:
+                weight = weight.permute(2, 3, 0, 1)
+            x = F.conv_transpose2d(x,
+                                   weight=weight,
+                                   bias=None,
+                                   stride=self.stride,
+                                   padding=self.padding,
+                                   groups=(batch if self.fused_modulate else 1))
+            x = self.filter(x)
+        else:
+            weight = weight.permute(3, 2, 0, 1)
+            x = F.conv2d(x,
+                         weight=weight,
+                         bias=None,
+                         stride=self.stride,
+                         padding=self.padding,
+                         groups=(batch if self.fused_modulate else 1))
+        if self.fused_modulate:
+            x = x.view(batch, self.out_c, self.res, self.res)
+        elif self.demodulate:
+            x = x / _weight_norm.view(batch, self.out_c, 1, 1)
+        if self.add_noise:
+            if randomize_noise:
+                noise = torch.randn(x.shape[0], 1, self.res, self.res).to(x)
+            else:
+                noise = self.noise
+            x = x + noise * self.noise_strength.view(1, 1, 1, 1)
+        bias = self.bias * self.bscale if self.bias is not None else None
+        if bias is not None:
+            x = x + bias.view(1, -1, 1, 1)
+        x = self.activate(x) * self.activate_scale
+        return x, style
+class DenseBlock(nn.Module):
+    """Implements the dense block.
+    Basically, this block executes fully-connected layer and activation layer.
+    NOTE: This layer supports adding an additional bias beyond the trainable
+    bias parameter. This is specially used for the mapping from the w code to
+    the style code.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 add_bias=True,
+                 additional_bias=0,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            add_bias: Whether to add bias onto the fully-connected result.
+                (default: True)
+            additional_bias: The additional bias, which is independent from the
+                bias parameter. (default: 0.0)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        weight_shape = (out_channels, in_channels)
+        wscale = wscale_gain / np.sqrt(in_channels)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+        else:
+            self.bias = None
+        self.bscale = lr_mul
+        self.additional_bias = additional_bias
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+            self.activate_scale = 1.0
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+            self.activate_scale = np.sqrt(2.0)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        if x.ndim != 2:
+            x = x.view(x.shape[0], -1)
+        bias = self.bias * self.bscale if self.bias is not None else None
+        x = F.linear(x, weight=self.weight * self.wscale, bias=bias)
+        x = self.activate(x + self.additional_bias) * self.activate_scale
+        return x

models/stylegan_discriminator.py ADDED Viewed

	@@ -0,0 +1,530 @@

+# python3.7
+"""Contains the implementation of discriminator described in StyleGAN.
+Paper: https://arxiv.org/pdf/1812.04948.pdf
+Official TensorFlow implementation: https://github.com/NVlabs/stylegan
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = ['StyleGANDiscriminator']
+# Resolutions allowed.
+_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024]
+# Initial resolution.
+_INIT_RES = 4
+# Fused-scale options allowed.
+_FUSED_SCALE_ALLOWED = [True, False, 'auto']
+# Minimal resolution for `auto` fused-scale strategy.
+_AUTO_FUSED_SCALE_MIN_RES = 128
+# Default gain factor for weight scaling.
+_WSCALE_GAIN = np.sqrt(2.0)
+class StyleGANDiscriminator(nn.Module):
+    """Defines the discriminator network in StyleGAN.
+    NOTE: The discriminator takes images with `RGB` channel order and pixel
+    range [-1, 1] as inputs.
+    Settings for the network:
+    (1) resolution: The resolution of the input image.
+    (2) image_channels: Number of channels of the input image. (default: 3)
+    (3) label_size: Size of the additional label for conditional generation.
+        (default: 0)
+    (4) fused_scale: Whether to fused `conv2d` and `downsample` together,
+        resulting in `conv2d` with strides. (default: `auto`)
+    (5) use_wscale: Whether to use weight scaling. (default: True)
+    (6) minibatch_std_group_size: Group size for the minibatch standard
+        deviation layer. 0 means disable. (default: 4)
+    (7) minibatch_std_channels: Number of new channels after the minibatch
+        standard deviation layer. (default: 1)
+    (8) fmaps_base: Factor to control number of feature maps for each layer.
+        (default: 16 << 10)
+    (9) fmaps_max: Maximum number of feature maps in each layer. (default: 512)
+    """
+    def __init__(self,
+                 resolution,
+                 image_channels=3,
+                 label_size=0,
+                 fused_scale='auto',
+                 use_wscale=True,
+                 minibatch_std_group_size=4,
+                 minibatch_std_channels=1,
+                 fmaps_base=16 << 10,
+                 fmaps_max=512):
+        """Initializes with basic settings.
+        Raises:
+            ValueError: If the `resolution` is not supported, or `fused_scale`
+                is not supported.
+        """
+        super().__init__()
+        if resolution not in _RESOLUTIONS_ALLOWED:
+            raise ValueError(f'Invalid resolution: `{resolution}`!\n'
+                             f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.')
+        if fused_scale not in _FUSED_SCALE_ALLOWED:
+            raise ValueError(f'Invalid fused-scale option: `{fused_scale}`!\n'
+                             f'Options allowed: {_FUSED_SCALE_ALLOWED}.')
+        self.init_res = _INIT_RES
+        self.init_res_log2 = int(np.log2(self.init_res))
+        self.resolution = resolution
+        self.final_res_log2 = int(np.log2(self.resolution))
+        self.image_channels = image_channels
+        self.label_size = label_size
+        self.fused_scale = fused_scale
+        self.use_wscale = use_wscale
+        self.minibatch_std_group_size = minibatch_std_group_size
+        self.minibatch_std_channels = minibatch_std_channels
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        # Level of detail (used for progressive training).
+        self.register_buffer('lod', torch.zeros(()))
+        self.pth_to_tf_var_mapping = {'lod': 'lod'}
+        for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1):
+            res = 2 ** res_log2
+            block_idx = self.final_res_log2 - res_log2
+            # Input convolution layer for each resolution.
+            self.add_module(
+                f'input{block_idx}',
+                ConvBlock(in_channels=self.image_channels,
+                          out_channels=self.get_nf(res),
+                          kernel_size=1,
+                          padding=0,
+                          use_wscale=self.use_wscale))
+            self.pth_to_tf_var_mapping[f'input{block_idx}.weight'] = (
+                f'FromRGB_lod{block_idx}/weight')
+            self.pth_to_tf_var_mapping[f'input{block_idx}.bias'] = (
+                f'FromRGB_lod{block_idx}/bias')
+            # Convolution block for each resolution (except the last one).
+            if res != self.init_res:
+                if self.fused_scale == 'auto':
+                    fused_scale = (res >= _AUTO_FUSED_SCALE_MIN_RES)
+                else:
+                    fused_scale = self.fused_scale
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res),
+                              use_wscale=self.use_wscale))
+                tf_layer0_name = 'Conv0'
+                self.add_module(
+                    f'layer{2 * block_idx + 1}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res // 2),
+                              downsample=True,
+                              fused_scale=fused_scale,
+                              use_wscale=self.use_wscale))
+                tf_layer1_name = 'Conv1_down'
+            # Convolution block for last resolution.
+            else:
+                self.add_module(
+                    f'layer{2 * block_idx}',
+                    ConvBlock(in_channels=self.get_nf(res),
+                              out_channels=self.get_nf(res),
+                              use_wscale=self.use_wscale,
+                              minibatch_std_group_size=minibatch_std_group_size,
+                              minibatch_std_channels=minibatch_std_channels))
+                tf_layer0_name = 'Conv'
+                self.add_module(
+                    f'layer{2 * block_idx + 1}',
+                    DenseBlock(in_channels=self.get_nf(res) * res * res,
+                               out_channels=self.get_nf(res // 2),
+                               use_wscale=self.use_wscale))
+                tf_layer1_name = 'Dense0'
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.weight'] = (
+                f'{res}x{res}/{tf_layer0_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx}.bias'] = (
+                f'{res}x{res}/{tf_layer0_name}/bias')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.weight'] = (
+                f'{res}x{res}/{tf_layer1_name}/weight')
+            self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 1}.bias'] = (
+                f'{res}x{res}/{tf_layer1_name}/bias')
+        # Final dense block.
+        self.add_module(
+            f'layer{2 * block_idx + 2}',
+            DenseBlock(in_channels=self.get_nf(res // 2),
+                       out_channels=max(self.label_size, 1),
+                       use_wscale=self.use_wscale,
+                       wscale_gain=1.0,
+                       activation_type='linear'))
+        self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.weight'] = (
+            f'{res}x{res}/Dense1/weight')
+        self.pth_to_tf_var_mapping[f'layer{2 * block_idx + 2}.bias'] = (
+            f'{res}x{res}/Dense1/bias')
+        self.downsample = DownsamplingLayer()
+    def get_nf(self, res):
+        """Gets number of feature maps according to current resolution."""
+        return min(self.fmaps_base // res, self.fmaps_max)
+    def forward(self, image, label=None, lod=None, **_unused_kwargs):
+        expected_shape = (self.image_channels, self.resolution, self.resolution)
+        if image.ndim != 4 or image.shape[1:] != expected_shape:
+            raise ValueError(f'The input tensor should be with shape '
+                             f'[batch_size, channel, height, width], where '
+                             f'`channel` equals to {self.image_channels}, '
+                             f'`height`, `width` equal to {self.resolution}!\n'
+                             f'But `{image.shape}` is received!')
+        lod = self.lod.cpu().tolist() if lod is None else lod
+        if lod + self.init_res_log2 > self.final_res_log2:
+            raise ValueError(f'Maximum level-of-detail (lod) is '
+                             f'{self.final_res_log2 - self.init_res_log2}, '
+                             f'but `{lod}` is received!')
+        if self.label_size:
+            if label is None:
+                raise ValueError(f'Model requires an additional label '
+                                 f'(with size {self.label_size}) as input, '
+                                 f'but no label is received!')
+            batch_size = image.shape[0]
+            if label.ndim != 2 or label.shape != (batch_size, self.label_size):
+                raise ValueError(f'Input label should be with shape '
+                                 f'[batch_size, label_size], where '
+                                 f'`batch_size` equals to that of '
+                                 f'images ({image.shape[0]}) and '
+                                 f'`label_size` equals to {self.label_size}!\n'
+                                 f'But `{label.shape}` is received!')
+        for res_log2 in range(self.final_res_log2, self.init_res_log2 - 1, -1):
+            block_idx = current_lod = self.final_res_log2 - res_log2
+            if current_lod <= lod < current_lod + 1:
+                x = self.__getattr__(f'input{block_idx}')(image)
+            elif current_lod - 1 < lod < current_lod:
+                alpha = lod - np.floor(lod)
+                x = (self.__getattr__(f'input{block_idx}')(image) * alpha +
+                     x * (1 - alpha))
+            if lod < current_lod + 1:
+                x = self.__getattr__(f'layer{2 * block_idx}')(x)
+                x = self.__getattr__(f'layer{2 * block_idx + 1}')(x)
+            if lod > current_lod:
+                image = self.downsample(image)
+        x = self.__getattr__(f'layer{2 * block_idx + 2}')(x)
+        if self.label_size:
+            x = torch.sum(x * label, dim=1, keepdim=True)
+        return x
+class MiniBatchSTDLayer(nn.Module):
+    """Implements the minibatch standard deviation layer."""
+    def __init__(self, group_size=4, new_channels=1, epsilon=1e-8):
+        super().__init__()
+        self.group_size = group_size
+        self.new_channels = new_channels
+        self.epsilon = epsilon
+    def forward(self, x):
+        if self.group_size <= 1:
+            return x
+        ng = min(self.group_size, x.shape[0])
+        nc = self.new_channels
+        temp_c = x.shape[1] // nc                               # [NCHW]
+        y = x.view(ng, -1, nc, temp_c, x.shape[2], x.shape[3])  # [GMncHW]
+        y = y - torch.mean(y, dim=0, keepdim=True)              # [GMncHW]
+        y = torch.mean(y ** 2, dim=0)                           # [MncHW]
+        y = torch.sqrt(y + self.epsilon)                        # [MncHW]
+        y = torch.mean(y, dim=[2, 3, 4], keepdim=True)          # [Mn111]
+        y = torch.mean(y, dim=2)                                # [Mn11]
+        y = y.repeat(ng, 1, x.shape[2], x.shape[3])             # [NnHW]
+        return torch.cat([x, y], dim=1)
+class DownsamplingLayer(nn.Module):
+    """Implements the downsampling layer.
+    Basically, this layer can be used to downsample feature maps with average
+    pooling.
+    """
+    def __init__(self, scale_factor=2):
+        super().__init__()
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        if self.scale_factor <= 1:
+            return x
+        return F.avg_pool2d(x,
+                            kernel_size=self.scale_factor,
+                            stride=self.scale_factor,
+                            padding=0)
+class Blur(torch.autograd.Function):
+    """Defines blur operation with customized gradient computation."""
+    @staticmethod
+    def forward(ctx, x, kernel):
+        ctx.save_for_backward(kernel)
+        y = F.conv2d(input=x,
+                     weight=kernel,
+                     bias=None,
+                     stride=1,
+                     padding=1,
+                     groups=x.shape[1])
+        return y
+    @staticmethod
+    def backward(ctx, dy):
+        kernel, = ctx.saved_tensors
+        dx = BlurBackPropagation.apply(dy, kernel)
+        return dx, None, None
+class BlurBackPropagation(torch.autograd.Function):
+    """Defines the back propagation of blur operation.
+    NOTE: This is used to speed up the backward of gradient penalty.
+    """
+    @staticmethod
+    def forward(ctx, dy, kernel):
+        ctx.save_for_backward(kernel)
+        dx = F.conv2d(input=dy,
+                      weight=kernel.flip((2, 3)),
+                      bias=None,
+                      stride=1,
+                      padding=1,
+                      groups=dy.shape[1])
+        return dx
+    @staticmethod
+    def backward(ctx, ddx):
+        kernel, = ctx.saved_tensors
+        ddy = F.conv2d(input=ddx,
+                       weight=kernel,
+                       bias=None,
+                       stride=1,
+                       padding=1,
+                       groups=ddx.shape[1])
+        return ddy, None, None
+class BlurLayer(nn.Module):
+    """Implements the blur layer."""
+    def __init__(self,
+                 channels,
+                 kernel=(1, 2, 1),
+                 normalize=True):
+        super().__init__()
+        kernel = np.array(kernel, dtype=np.float32).reshape(1, -1)
+        kernel = kernel.T.dot(kernel)
+        if normalize:
+            kernel = kernel / np.sum(kernel)
+        kernel = kernel[np.newaxis, np.newaxis]
+        kernel = np.tile(kernel, [channels, 1, 1, 1])
+        self.register_buffer('kernel', torch.from_numpy(kernel))
+    def forward(self, x):
+        return Blur.apply(x, self.kernel)
+class ConvBlock(nn.Module):
+    """Implements the convolutional block.
+    Basically, this block executes minibatch standard deviation layer (if
+    needed), convolutional layer, activation layer, and downsampling layer (
+    if needed) in sequence.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 add_bias=True,
+                 downsample=False,
+                 fused_scale=False,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu',
+                 minibatch_std_group_size=0,
+                 minibatch_std_channels=1):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            stride: Stride parameter for convolution operation. (default: 1)
+            padding: Padding parameter for convolution operation. (default: 1)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            downsample: Whether to downsample the result after convolution.
+                (default: False)
+            fused_scale: Whether to fused `conv2d` and `downsample` together,
+                resulting in `conv2d` with strides. (default: False)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+            minibatch_std_group_size: Group size for the minibatch standard
+                deviation layer. 0 means disable. (default: 0)
+            minibatch_std_channels: Number of new channels after the minibatch
+                standard deviation layer. (default: 1)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        if minibatch_std_group_size > 1:
+            in_channels = in_channels + minibatch_std_channels
+            self.mbstd = MiniBatchSTDLayer(group_size=minibatch_std_group_size,
+                                           new_channels=minibatch_std_channels)
+        else:
+            self.mbstd = nn.Identity()
+        if downsample:
+            self.blur = BlurLayer(channels=in_channels)
+        else:
+            self.blur = nn.Identity()
+        if downsample and not fused_scale:
+            self.downsample = DownsamplingLayer()
+        else:
+            self.downsample = nn.Identity()
+        if downsample and fused_scale:
+            self.use_stride = True
+            self.stride = 2
+            self.padding = 1
+        else:
+            self.use_stride = False
+            self.stride = stride
+            self.padding = padding
+        weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+            self.bscale = lr_mul
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        x = self.mbstd(x)
+        x = self.blur(x)
+        weight = self.weight * self.wscale
+        bias = self.bias * self.bscale if self.bias is not None else None
+        if self.use_stride:
+            weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0.0)
+            weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] +
+                      weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1]) * 0.25
+        x = F.conv2d(x,
+                     weight=weight,
+                     bias=bias,
+                     stride=self.stride,
+                     padding=self.padding)
+        x = self.downsample(x)
+        x = self.activate(x)
+        return x
+class DenseBlock(nn.Module):
+    """Implements the dense block.
+    Basically, this block executes fully-connected layer and activation layer.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 add_bias=True,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            add_bias: Whether to add bias onto the fully-connected result.
+                (default: True)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        weight_shape = (out_channels, in_channels)
+        wscale = wscale_gain / np.sqrt(in_channels)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+            self.bscale = lr_mul
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        if x.ndim != 2:
+            x = x.view(x.shape[0], -1)
+        bias = self.bias * self.bscale if self.bias is not None else None
+        x = F.linear(x, weight=self.weight * self.wscale, bias=bias)
+        x = self.activate(x)
+        return x

models/stylegan_generator.py ADDED Viewed

	@@ -0,0 +1,869 @@

+# python3.7
+"""Contains the implementation of generator described in StyleGAN.
+Paper: https://arxiv.org/pdf/1812.04948.pdf
+Official TensorFlow implementation: https://github.com/NVlabs/stylegan
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .sync_op import all_gather
+__all__ = ['StyleGANGenerator']
+# Resolutions allowed.
+_RESOLUTIONS_ALLOWED = [8, 16, 32, 64, 128, 256, 512, 1024]
+# Initial resolution.
+_INIT_RES = 4
+# Fused-scale options allowed.
+_FUSED_SCALE_ALLOWED = [True, False, 'auto']
+# Minimal resolution for `auto` fused-scale strategy.
+_AUTO_FUSED_SCALE_MIN_RES = 128
+# Default gain factor for weight scaling.
+_WSCALE_GAIN = np.sqrt(2.0)
+_STYLEMOD_WSCALE_GAIN = 1.0
+class StyleGANGenerator(nn.Module):
+    """Defines the generator network in StyleGAN.
+    NOTE: The synthesized images are with `RGB` channel order and pixel range
+    [-1, 1].
+    Settings for the mapping network:
+    (1) z_space_dim: Dimension of the input latent space, Z. (default: 512)
+    (2) w_space_dim: Dimension of the outout latent space, W. (default: 512)
+    (3) label_size: Size of the additional label for conditional generation.
+        (default: 0)
+    (4）mapping_layers: Number of layers of the mapping network. (default: 8)
+    (5) mapping_fmaps: Number of hidden channels of the mapping network.
+        (default: 512)
+    (6) mapping_lr_mul: Learning rate multiplier for the mapping network.
+        (default: 0.01)
+    (7) repeat_w: Repeat w-code for different layers.
+    Settings for the synthesis network:
+    (1) resolution: The resolution of the output image.
+    (2) image_channels: Number of channels of the output image. (default: 3)
+    (3) final_tanh: Whether to use `tanh` to control the final pixel range.
+        (default: False)
+    (4) const_input: Whether to use a constant in the first convolutional layer.
+        (default: True)
+    (5) fused_scale: Whether to fused `upsample` and `conv2d` together,
+        resulting in `conv2d_transpose`. (default: `auto`)
+    (6) use_wscale: Whether to use weight scaling. (default: True)
+    (7) fmaps_base: Factor to control number of feature maps for each layer.
+        (default: 16 << 10)
+    (8) fmaps_max: Maximum number of feature maps in each layer. (default: 512)
+    """
+    def __init__(self,
+                 resolution,
+                 z_space_dim=512,
+                 w_space_dim=512,
+                 label_size=0,
+                 mapping_layers=8,
+                 mapping_fmaps=512,
+                 mapping_lr_mul=0.01,
+                 repeat_w=True,
+                 image_channels=3,
+                 final_tanh=False,
+                 const_input=True,
+                 fused_scale='auto',
+                 use_wscale=True,
+                 fmaps_base=16 << 10,
+                 fmaps_max=512):
+        """Initializes with basic settings.
+        Raises:
+            ValueError: If the `resolution` is not supported, or `fused_scale`
+                is not supported.
+        """
+        super().__init__()
+        if resolution not in _RESOLUTIONS_ALLOWED:
+            raise ValueError(f'Invalid resolution: `{resolution}`!\n'
+                             f'Resolutions allowed: {_RESOLUTIONS_ALLOWED}.')
+        if fused_scale not in _FUSED_SCALE_ALLOWED:
+            raise ValueError(f'Invalid fused-scale option: `{fused_scale}`!\n'
+                             f'Options allowed: {_FUSED_SCALE_ALLOWED}.')
+        self.init_res = _INIT_RES
+        self.resolution = resolution
+        self.z_space_dim = z_space_dim
+        self.w_space_dim = w_space_dim
+        self.label_size = label_size
+        self.mapping_layers = mapping_layers
+        self.mapping_fmaps = mapping_fmaps
+        self.mapping_lr_mul = mapping_lr_mul
+        self.repeat_w = repeat_w
+        self.image_channels = image_channels
+        self.final_tanh = final_tanh
+        self.const_input = const_input
+        self.fused_scale = fused_scale
+        self.use_wscale = use_wscale
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        self.num_layers = int(np.log2(self.resolution // self.init_res * 2)) * 2
+        if self.repeat_w:
+            self.mapping_space_dim = self.w_space_dim
+        else:
+            self.mapping_space_dim = self.w_space_dim * self.num_layers
+        self.mapping = MappingModule(input_space_dim=self.z_space_dim,
+                                     hidden_space_dim=self.mapping_fmaps,
+                                     final_space_dim=self.mapping_space_dim,
+                                     label_size=self.label_size,
+                                     num_layers=self.mapping_layers,
+                                     use_wscale=self.use_wscale,
+                                     lr_mul=self.mapping_lr_mul)
+        self.truncation = TruncationModule(w_space_dim=self.w_space_dim,
+                                           num_layers=self.num_layers,
+                                           repeat_w=self.repeat_w)
+        self.synthesis = SynthesisModule(resolution=self.resolution,
+                                         init_resolution=self.init_res,
+                                         w_space_dim=self.w_space_dim,
+                                         image_channels=self.image_channels,
+                                         final_tanh=self.final_tanh,
+                                         const_input=self.const_input,
+                                         fused_scale=self.fused_scale,
+                                         use_wscale=self.use_wscale,
+                                         fmaps_base=self.fmaps_base,
+                                         fmaps_max=self.fmaps_max)
+        self.pth_to_tf_var_mapping = {}
+        for key, val in self.mapping.pth_to_tf_var_mapping.items():
+            self.pth_to_tf_var_mapping[f'mapping.{key}'] = val
+        for key, val in self.truncation.pth_to_tf_var_mapping.items():
+            self.pth_to_tf_var_mapping[f'truncation.{key}'] = val
+        for key, val in self.synthesis.pth_to_tf_var_mapping.items():
+            self.pth_to_tf_var_mapping[f'synthesis.{key}'] = val
+    def forward(self,
+                z,
+                label=None,
+                lod=None,
+                w_moving_decay=0.995,
+                style_mixing_prob=0.9,
+                trunc_psi=None,
+                trunc_layers=None,
+                randomize_noise=False,
+                **_unused_kwargs):
+        mapping_results = self.mapping(z, label)
+        w = mapping_results['w']
+        if self.training and w_moving_decay < 1:
+            batch_w_avg = all_gather(w).mean(dim=0)
+            self.truncation.w_avg.copy_(
+                self.truncation.w_avg * w_moving_decay +
+                batch_w_avg * (1 - w_moving_decay))
+        if self.training and style_mixing_prob > 0:
+            new_z = torch.randn_like(z)
+            new_w = self.mapping(new_z, label)['w']
+            lod = self.synthesis.lod.cpu().tolist() if lod is None else lod
+            current_layers = self.num_layers - int(lod) * 2
+            if np.random.uniform() < style_mixing_prob:
+                mixing_cutoff = np.random.randint(1, current_layers)
+                w = self.truncation(w)
+                new_w = self.truncation(new_w)
+                w[:, mixing_cutoff:] = new_w[:, mixing_cutoff:]
+        wp = self.truncation(w, trunc_psi, trunc_layers)
+        synthesis_results = self.synthesis(wp, lod, randomize_noise)
+        return {**mapping_results, **synthesis_results}
+class MappingModule(nn.Module):
+    """Implements the latent space mapping module.
+    Basically, this module executes several dense layers in sequence.
+    """
+    def __init__(self,
+                 input_space_dim=512,
+                 hidden_space_dim=512,
+                 final_space_dim=512,
+                 label_size=0,
+                 num_layers=8,
+                 normalize_input=True,
+                 use_wscale=True,
+                 lr_mul=0.01):
+        super().__init__()
+        self.input_space_dim = input_space_dim
+        self.hidden_space_dim = hidden_space_dim
+        self.final_space_dim = final_space_dim
+        self.label_size = label_size
+        self.num_layers = num_layers
+        self.normalize_input = normalize_input
+        self.use_wscale = use_wscale
+        self.lr_mul = lr_mul
+        self.norm = PixelNormLayer() if self.normalize_input else nn.Identity()
+        self.pth_to_tf_var_mapping = {}
+        for i in range(num_layers):
+            dim_mul = 2 if label_size else 1
+            in_channels = (input_space_dim * dim_mul if i == 0 else
+                           hidden_space_dim)
+            out_channels = (final_space_dim if i == (num_layers - 1) else
+                            hidden_space_dim)
+            self.add_module(f'dense{i}',
+                            DenseBlock(in_channels=in_channels,
+                                       out_channels=out_channels,
+                                       use_wscale=self.use_wscale,
+                                       lr_mul=self.lr_mul))
+            self.pth_to_tf_var_mapping[f'dense{i}.weight'] = f'Dense{i}/weight'
+            self.pth_to_tf_var_mapping[f'dense{i}.bias'] = f'Dense{i}/bias'
+        if label_size:
+            self.label_weight = nn.Parameter(
+                torch.randn(label_size, input_space_dim))
+            self.pth_to_tf_var_mapping[f'label_weight'] = f'LabelConcat/weight'
+    def forward(self, z, label=None):
+        if z.ndim != 2 or z.shape[1] != self.input_space_dim:
+            raise ValueError(f'Input latent code should be with shape '
+                             f'[batch_size, input_dim], where '
+                             f'`input_dim` equals to {self.input_space_dim}!\n'
+                             f'But `{z.shape}` is received!')
+        if self.label_size:
+            if label is None:
+                raise ValueError(f'Model requires an additional label '
+                                 f'(with size {self.label_size}) as input, '
+                                 f'but no label is received!')
+            if label.ndim != 2 or label.shape != (z.shape[0], self.label_size):
+                raise ValueError(f'Input label should be with shape '
+                                 f'[batch_size, label_size], where '
+                                 f'`batch_size` equals to that of '
+                                 f'latent codes ({z.shape[0]}) and '
+                                 f'`label_size` equals to {self.label_size}!\n'
+                                 f'But `{label.shape}` is received!')
+            embedding = torch.matmul(label, self.label_weight)
+            z = torch.cat((z, embedding), dim=1)
+        z = self.norm(z)
+        w = z
+        for i in range(self.num_layers):
+            w = self.__getattr__(f'dense{i}')(w)
+        results = {
+            'z': z,
+            'label': label,
+            'w': w,
+        }
+        if self.label_size:
+            results['embedding'] = embedding
+        return results
+class TruncationModule(nn.Module):
+    """Implements the truncation module.
+    Truncation is executed as follows:
+    For layers in range [0, truncation_layers), the truncated w-code is computed
+    as
+    w_new = w_avg + (w - w_avg) * truncation_psi
+    To disable truncation, please set
+    (1) truncation_psi = 1.0 (None) OR
+    (2) truncation_layers = 0 (None)
+    NOTE: The returned tensor is layer-wise style codes.
+    """
+    def __init__(self, w_space_dim, num_layers, repeat_w=True):
+        super().__init__()
+        self.num_layers = num_layers
+        self.w_space_dim = w_space_dim
+        self.repeat_w = repeat_w
+        if self.repeat_w:
+            self.register_buffer('w_avg', torch.zeros(w_space_dim))
+        else:
+            self.register_buffer('w_avg', torch.zeros(num_layers * w_space_dim))
+        self.pth_to_tf_var_mapping = {'w_avg': 'dlatent_avg'}
+    def forward(self, w, trunc_psi=None, trunc_layers=None):
+        if w.ndim == 2:
+            if self.repeat_w and w.shape[1] == self.w_space_dim:
+                w = w.view(-1, 1, self.w_space_dim)
+                wp = w.repeat(1, self.num_layers, 1)
+            else:
+                assert w.shape[1] == self.w_space_dim * self.num_layers
+                wp = w.view(-1, self.num_layers, self.w_space_dim)
+        else:
+            wp = w
+        assert wp.ndim == 3
+        assert wp.shape[1:] == (self.num_layers, self.w_space_dim)
+        trunc_psi = 1.0 if trunc_psi is None else trunc_psi
+        trunc_layers = 0 if trunc_layers is None else trunc_layers
+        if trunc_psi < 1.0 and trunc_layers > 0:
+            layer_idx = np.arange(self.num_layers).reshape(1, -1, 1)
+            coefs = np.ones_like(layer_idx, dtype=np.float32)
+            coefs[layer_idx < trunc_layers] *= trunc_psi
+            coefs = torch.from_numpy(coefs).to(wp)
+            w_avg = self.w_avg.view(1, -1, self.w_space_dim)
+            wp = w_avg + (wp - w_avg) * coefs
+        return wp
+class SynthesisModule(nn.Module):
+    """Implements the image synthesis module.
+    Basically, this module executes several convolutional layers in sequence.
+    """
+    def __init__(self,
+                 resolution=1024,
+                 init_resolution=4,
+                 w_space_dim=512,
+                 image_channels=3,
+                 final_tanh=False,
+                 const_input=True,
+                 fused_scale='auto',
+                 use_wscale=True,
+                 fmaps_base=16 << 10,
+                 fmaps_max=512):
+        super().__init__()
+        self.init_res = init_resolution
+        self.init_res_log2 = int(np.log2(self.init_res))
+        self.resolution = resolution
+        self.final_res_log2 = int(np.log2(self.resolution))
+        self.w_space_dim = w_space_dim
+        self.image_channels = image_channels
+        self.final_tanh = final_tanh
+        self.const_input = const_input
+        self.fused_scale = fused_scale
+        self.use_wscale = use_wscale
+        self.fmaps_base = fmaps_base
+        self.fmaps_max = fmaps_max
+        self.num_layers = (self.final_res_log2 - self.init_res_log2 + 1) * 2
+        # Level of detail (used for progressive training).
+        self.register_buffer('lod', torch.zeros(()))
+        self.pth_to_tf_var_mapping = {'lod': 'lod'}
+        for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1):
+            res = 2 ** res_log2
+            block_idx = res_log2 - self.init_res_log2
+            # First convolution layer for each resolution.
+            layer_name = f'layer{2 * block_idx}'
+            if res == self.init_res:
+                if self.const_input:
+                    self.add_module(layer_name,
+                                    ConvBlock(in_channels=self.get_nf(res),
+                                              out_channels=self.get_nf(res),
+                                              resolution=self.init_res,
+                                              w_space_dim=self.w_space_dim,
+                                              position='const_init',
+                                              use_wscale=self.use_wscale))
+                    tf_layer_name = 'Const'
+                    self.pth_to_tf_var_mapping[f'{layer_name}.const'] = (
+                        f'{res}x{res}/{tf_layer_name}/const')
+                else:
+                    self.add_module(layer_name,
+                                    ConvBlock(in_channels=self.w_space_dim,
+                                              out_channels=self.get_nf(res),
+                                              resolution=self.init_res,
+                                              w_space_dim=self.w_space_dim,
+                                              kernel_size=self.init_res,
+                                              padding=self.init_res - 1,
+                                              use_wscale=self.use_wscale))
+                    tf_layer_name = 'Dense'
+                    self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                        f'{res}x{res}/{tf_layer_name}/weight')
+            else:
+                if self.fused_scale == 'auto':
+                    fused_scale = (res >= _AUTO_FUSED_SCALE_MIN_RES)
+                else:
+                    fused_scale = self.fused_scale
+                self.add_module(layer_name,
+                                ConvBlock(in_channels=self.get_nf(res // 2),
+                                          out_channels=self.get_nf(res),
+                                          resolution=res,
+                                          w_space_dim=self.w_space_dim,
+                                          upsample=True,
+                                          fused_scale=fused_scale,
+                                          use_wscale=self.use_wscale))
+                tf_layer_name = 'Conv0_up'
+                self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                    f'{res}x{res}/{tf_layer_name}/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/bias')
+            self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/StyleMod/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/StyleMod/bias')
+            self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/Noise/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.noise'] = (
+                f'noise{2 * block_idx}')
+            # Second convolution layer for each resolution.
+            layer_name = f'layer{2 * block_idx + 1}'
+            self.add_module(layer_name,
+                            ConvBlock(in_channels=self.get_nf(res),
+                                      out_channels=self.get_nf(res),
+                                      resolution=res,
+                                      w_space_dim=self.w_space_dim,
+                                      use_wscale=self.use_wscale))
+            tf_layer_name = 'Conv' if res == self.init_res else 'Conv1'
+            self.pth_to_tf_var_mapping[f'{layer_name}.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/bias')
+            self.pth_to_tf_var_mapping[f'{layer_name}.style.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/StyleMod/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.style.bias'] = (
+                f'{res}x{res}/{tf_layer_name}/StyleMod/bias')
+            self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.weight'] = (
+                f'{res}x{res}/{tf_layer_name}/Noise/weight')
+            self.pth_to_tf_var_mapping[f'{layer_name}.apply_noise.noise'] = (
+                f'noise{2 * block_idx + 1}')
+            # Output convolution layer for each resolution.
+            self.add_module(f'output{block_idx}',
+                            ConvBlock(in_channels=self.get_nf(res),
+                                      out_channels=self.image_channels,
+                                      resolution=res,
+                                      w_space_dim=self.w_space_dim,
+                                      position='last',
+                                      kernel_size=1,
+                                      padding=0,
+                                      use_wscale=self.use_wscale,
+                                      wscale_gain=1.0,
+                                      activation_type='linear'))
+            self.pth_to_tf_var_mapping[f'output{block_idx}.weight'] = (
+                f'ToRGB_lod{self.final_res_log2 - res_log2}/weight')
+            self.pth_to_tf_var_mapping[f'output{block_idx}.bias'] = (
+                f'ToRGB_lod{self.final_res_log2 - res_log2}/bias')
+        self.upsample = UpsamplingLayer()
+        self.final_activate = nn.Tanh() if final_tanh else nn.Identity()
+    def get_nf(self, res):
+        """Gets number of feature maps according to current resolution."""
+        return min(self.fmaps_base // res, self.fmaps_max)
+    def forward(self, wp, lod=None, randomize_noise=False):
+        if wp.ndim != 3 or wp.shape[1:] != (self.num_layers, self.w_space_dim):
+            raise ValueError(f'Input tensor should be with shape '
+                             f'[batch_size, num_layers, w_space_dim], where '
+                             f'`num_layers` equals to {self.num_layers}, and '
+                             f'`w_space_dim` equals to {self.w_space_dim}!\n'
+                             f'But `{wp.shape}` is received!')
+        lod = self.lod.cpu().tolist() if lod is None else lod
+        if lod + self.init_res_log2 > self.final_res_log2:
+            raise ValueError(f'Maximum level-of-detail (lod) is '
+                             f'{self.final_res_log2 - self.init_res_log2}, '
+                             f'but `{lod}` is received!')
+        results = {'wp': wp}
+        for res_log2 in range(self.init_res_log2, self.final_res_log2 + 1):
+            current_lod = self.final_res_log2 - res_log2
+            if lod < current_lod + 1:
+                block_idx = res_log2 - self.init_res_log2
+                if block_idx == 0:
+                    if self.const_input:
+                        x, style = self.layer0(None, wp[:, 0], randomize_noise)
+                    else:
+                        x = wp[:, 0].view(-1, self.w_space_dim, 1, 1)
+                        x, style = self.layer0(x, wp[:, 0], randomize_noise)
+                else:
+                    x, style = self.__getattr__(f'layer{2 * block_idx}')(
+                        x, wp[:, 2 * block_idx])
+                results[f'style{2 * block_idx:02d}'] = style
+                x, style = self.__getattr__(f'layer{2 * block_idx + 1}')(
+                    x, wp[:, 2 * block_idx + 1])
+                results[f'style{2 * block_idx + 1:02d}'] = style
+            if current_lod - 1 < lod <= current_lod:
+                image = self.__getattr__(f'output{block_idx}')(x, None)
+            elif current_lod < lod < current_lod + 1:
+                alpha = np.ceil(lod) - lod
+                image = (self.__getattr__(f'output{block_idx}')(x, None) * alpha
+                         + self.upsample(image) * (1 - alpha))
+            elif lod >= current_lod + 1:
+                image = self.upsample(image)
+        results['image'] = self.final_activate(image)
+        return results
+class PixelNormLayer(nn.Module):
+    """Implements pixel-wise feature vector normalization layer."""
+    def __init__(self, epsilon=1e-8):
+        super().__init__()
+        self.eps = epsilon
+    def forward(self, x):
+        norm = torch.sqrt(torch.mean(x ** 2, dim=1, keepdim=True) + self.eps)
+        return x / norm
+class InstanceNormLayer(nn.Module):
+    """Implements instance normalization layer."""
+    def __init__(self, epsilon=1e-8):
+        super().__init__()
+        self.eps = epsilon
+    def forward(self, x):
+        if x.ndim != 4:
+            raise ValueError(f'The input tensor should be with shape '
+                             f'[batch_size, channel, height, width], '
+                             f'but `{x.shape}` is received!')
+        x = x - torch.mean(x, dim=[2, 3], keepdim=True)
+        norm = torch.sqrt(
+            torch.mean(x ** 2, dim=[2, 3], keepdim=True) + self.eps)
+        return x / norm
+class UpsamplingLayer(nn.Module):
+    """Implements the upsampling layer.
+    Basically, this layer can be used to upsample feature maps with nearest
+    neighbor interpolation.
+    """
+    def __init__(self, scale_factor=2):
+        super().__init__()
+        self.scale_factor = scale_factor
+    def forward(self, x):
+        if self.scale_factor <= 1:
+            return x
+        return F.interpolate(x, scale_factor=self.scale_factor, mode='nearest')
+class Blur(torch.autograd.Function):
+    """Defines blur operation with customized gradient computation."""
+    @staticmethod
+    def forward(ctx, x, kernel):
+        ctx.save_for_backward(kernel)
+        y = F.conv2d(input=x,
+                     weight=kernel,
+                     bias=None,
+                     stride=1,
+                     padding=1,
+                     groups=x.shape[1])
+        return y
+    @staticmethod
+    def backward(ctx, dy):
+        kernel, = ctx.saved_tensors
+        dx = F.conv2d(input=dy,
+                      weight=kernel.flip((2, 3)),
+                      bias=None,
+                      stride=1,
+                      padding=1,
+                      groups=dy.shape[1])
+        return dx, None, None
+class BlurLayer(nn.Module):
+    """Implements the blur layer."""
+    def __init__(self,
+                 channels,
+                 kernel=(1, 2, 1),
+                 normalize=True):
+        super().__init__()
+        kernel = np.array(kernel, dtype=np.float32).reshape(1, -1)
+        kernel = kernel.T.dot(kernel)
+        if normalize:
+            kernel /= np.sum(kernel)
+        kernel = kernel[np.newaxis, np.newaxis]
+        kernel = np.tile(kernel, [channels, 1, 1, 1])
+        self.register_buffer('kernel', torch.from_numpy(kernel))
+    def forward(self, x):
+        return Blur.apply(x, self.kernel)
+class NoiseApplyingLayer(nn.Module):
+    """Implements the noise applying layer."""
+    def __init__(self, resolution, channels):
+        super().__init__()
+        self.res = resolution
+        self.register_buffer('noise', torch.randn(1, 1, self.res, self.res))
+        self.weight = nn.Parameter(torch.zeros(channels))
+    def forward(self, x, randomize_noise=False):
+        if x.ndim != 4:
+            raise ValueError(f'The input tensor should be with shape '
+                             f'[batch_size, channel, height, width], '
+                             f'but `{x.shape}` is received!')
+        if randomize_noise:
+            noise = torch.randn(x.shape[0], 1, self.res, self.res).to(x)
+        else:
+            noise = self.noise
+        return x + noise * self.weight.view(1, -1, 1, 1)
+class StyleModLayer(nn.Module):
+    """Implements the style modulation layer."""
+    def __init__(self,
+                 w_space_dim,
+                 out_channels,
+                 use_wscale=True):
+        super().__init__()
+        self.w_space_dim = w_space_dim
+        self.out_channels = out_channels
+        weight_shape = (self.out_channels * 2, self.w_space_dim)
+        wscale = _STYLEMOD_WSCALE_GAIN / np.sqrt(self.w_space_dim)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape))
+            self.wscale = wscale
+        else:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) * wscale)
+            self.wscale = 1.0
+        self.bias = nn.Parameter(torch.zeros(self.out_channels * 2))
+    def forward(self, x, w):
+        if w.ndim != 2 or w.shape[1] != self.w_space_dim:
+            raise ValueError(f'The input tensor should be with shape '
+                             f'[batch_size, w_space_dim], where '
+                             f'`w_space_dim` equals to {self.w_space_dim}!\n'
+                             f'But `{w.shape}` is received!')
+        style = F.linear(w, weight=self.weight * self.wscale, bias=self.bias)
+        style_split = style.view(-1, 2, self.out_channels, 1, 1)
+        x = x * (style_split[:, 0] + 1) + style_split[:, 1]
+        return x, style
+class ConvBlock(nn.Module):
+    """Implements the normal convolutional block.
+    Basically, this block executes upsampling layer (if needed), convolutional
+    layer, blurring layer, noise applying layer, activation layer, instance
+    normalization layer, and style modulation layer in sequence.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 resolution,
+                 w_space_dim,
+                 position=None,
+                 kernel_size=3,
+                 stride=1,
+                 padding=1,
+                 add_bias=True,
+                 upsample=False,
+                 fused_scale=False,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            resolution: Resolution of the output tensor.
+            w_space_dim: Dimension of W space for style modulation.
+            position: Position of the layer. `const_init`, `last` would lead to
+                different behavior. (default: None)
+            kernel_size: Size of the convolutional kernels. (default: 3)
+            stride: Stride parameter for convolution operation. (default: 1)
+            padding: Padding parameter for convolution operation. (default: 1)
+            add_bias: Whether to add bias onto the convolutional result.
+                (default: True)
+            upsample: Whether to upsample the input tensor before convolution.
+                (default: False)
+            fused_scale: Whether to fused `upsample` and `conv2d` together,
+                resulting in `conv2d_transpose`. (default: False)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        self.position = position
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+            self.bscale = lr_mul
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+        if self.position != 'last':
+            self.apply_noise = NoiseApplyingLayer(resolution, out_channels)
+            self.normalize = InstanceNormLayer()
+            self.style = StyleModLayer(w_space_dim, out_channels, use_wscale)
+        if self.position == 'const_init':
+            self.const = nn.Parameter(
+                torch.ones(1, in_channels, resolution, resolution))
+            return
+        self.blur = BlurLayer(out_channels) if upsample else nn.Identity()
+        if upsample and not fused_scale:
+            self.upsample = UpsamplingLayer()
+        else:
+            self.upsample = nn.Identity()
+        if upsample and fused_scale:
+            self.use_conv2d_transpose = True
+            self.stride = 2
+            self.padding = 1
+        else:
+            self.use_conv2d_transpose = False
+            self.stride = stride
+            self.padding = padding
+        weight_shape = (out_channels, in_channels, kernel_size, kernel_size)
+        fan_in = kernel_size * kernel_size * in_channels
+        wscale = wscale_gain / np.sqrt(fan_in)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+    def forward(self, x, w, randomize_noise=False):
+        if self.position != 'const_init':
+            x = self.upsample(x)
+            weight = self.weight * self.wscale
+            if self.use_conv2d_transpose:
+                weight = F.pad(weight, (1, 1, 1, 1, 0, 0, 0, 0), 'constant', 0)
+                weight = (weight[:, :, 1:, 1:] + weight[:, :, :-1, 1:] +
+                          weight[:, :, 1:, :-1] + weight[:, :, :-1, :-1])
+                weight = weight.permute(1, 0, 2, 3)
+                x = F.conv_transpose2d(x,
+                                       weight=weight,
+                                       bias=None,
+                                       stride=self.stride,
+                                       padding=self.padding)
+            else:
+                x = F.conv2d(x,
+                             weight=weight,
+                             bias=None,
+                             stride=self.stride,
+                             padding=self.padding)
+            x = self.blur(x)
+        else:
+            x = self.const.repeat(w.shape[0], 1, 1, 1)
+        bias = self.bias * self.bscale if self.bias is not None else None
+        if self.position == 'last':
+            if bias is not None:
+                x = x + bias.view(1, -1, 1, 1)
+            return x
+        x = self.apply_noise(x, randomize_noise)
+        if bias is not None:
+            x = x + bias.view(1, -1, 1, 1)
+        x = self.activate(x)
+        x = self.normalize(x)
+        x, style = self.style(x, w)
+        return x, style
+class DenseBlock(nn.Module):
+    """Implements the dense block.
+    Basically, this block executes fully-connected layer and activation layer.
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 add_bias=True,
+                 use_wscale=True,
+                 wscale_gain=_WSCALE_GAIN,
+                 lr_mul=1.0,
+                 activation_type='lrelu'):
+        """Initializes with block settings.
+        Args:
+            in_channels: Number of channels of the input tensor.
+            out_channels: Number of channels of the output tensor.
+            add_bias: Whether to add bias onto the fully-connected result.
+                (default: True)
+            use_wscale: Whether to use weight scaling. (default: True)
+            wscale_gain: Gain factor for weight scaling. (default: _WSCALE_GAIN)
+            lr_mul: Learning multiplier for both weight and bias. (default: 1.0)
+            activation_type: Type of activation. Support `linear` and `lrelu`.
+                (default: `lrelu`)
+        Raises:
+            NotImplementedError: If the `activation_type` is not supported.
+        """
+        super().__init__()
+        weight_shape = (out_channels, in_channels)
+        wscale = wscale_gain / np.sqrt(in_channels)
+        if use_wscale:
+            self.weight = nn.Parameter(torch.randn(*weight_shape) / lr_mul)
+            self.wscale = wscale * lr_mul
+        else:
+            self.weight = nn.Parameter(
+                torch.randn(*weight_shape) * wscale / lr_mul)
+            self.wscale = lr_mul
+        if add_bias:
+            self.bias = nn.Parameter(torch.zeros(out_channels))
+            self.bscale = lr_mul
+        else:
+            self.bias = None
+        if activation_type == 'linear':
+            self.activate = nn.Identity()
+        elif activation_type == 'lrelu':
+            self.activate = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        else:
+            raise NotImplementedError(f'Not implemented activation function: '
+                                      f'`{activation_type}`!')
+    def forward(self, x):
+        if x.ndim != 2:
+            x = x.view(x.shape[0], -1)
+        bias = self.bias * self.bscale if self.bias is not None else None
+        x = F.linear(x, weight=self.weight * self.wscale, bias=bias)
+        x = self.activate(x)
+        return x

models/sync_op.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# python3.7
+"""Contains the synchronizing operator."""
+import torch
+import torch.distributed as dist
+__all__ = ['all_gather']
+def all_gather(tensor):
+    """Gathers tensor from all devices and does averaging."""
+    if not dist.is_initialized():
+        return tensor
+    world_size = dist.get_world_size()
+    tensor_list = [torch.ones_like(tensor) for _ in range(world_size)]
+    dist.all_gather(tensor_list, tensor, async_op=False)
+    return torch.mean(torch.stack(tensor_list, dim=0), dim=0)

sefa.py ADDED Viewed

	@@ -0,0 +1,145 @@

+"""SeFa."""
+import os
+import argparse
+from tqdm import tqdm
+import numpy as np
+import torch
+from models import parse_gan_type
+from utils import to_tensor
+from utils import postprocess
+from utils import load_generator
+from utils import factorize_weight
+from utils import HtmlPageVisualizer
+def parse_args():
+    """Parses arguments."""
+    parser = argparse.ArgumentParser(
+        description='Discover semantics from the pre-trained weight.')
+    parser.add_argument('model_name', type=str,
+                        help='Name to the pre-trained model.')
+    parser.add_argument('--save_dir', type=str, default='results',
+                        help='Directory to save the visualization pages. '
+                             '(default: %(default)s)')
+    parser.add_argument('-L', '--layer_idx', type=str, default='all',
+                        help='Indices of layers to interpret. '
+                             '(default: %(default)s)')
+    parser.add_argument('-N', '--num_samples', type=int, default=5,
+                        help='Number of samples used for visualization. '
+                             '(default: %(default)s)')
+    parser.add_argument('-K', '--num_semantics', type=int, default=5,
+                        help='Number of semantic boundaries corresponding to '
+                             'the top-k eigen values. (default: %(default)s)')
+    parser.add_argument('--start_distance', type=float, default=-3.0,
+                        help='Start point for manipulation on each semantic. '
+                             '(default: %(default)s)')
+    parser.add_argument('--end_distance', type=float, default=3.0,
+                        help='Ending point for manipulation on each semantic. '
+                             '(default: %(default)s)')
+    parser.add_argument('--step', type=int, default=11,
+                        help='Manipulation step on each semantic. '
+                             '(default: %(default)s)')
+    parser.add_argument('--viz_size', type=int, default=256,
+                        help='Size of images to visualize on the HTML page. '
+                             '(default: %(default)s)')
+    parser.add_argument('--trunc_psi', type=float, default=0.7,
+                        help='Psi factor used for truncation. This is '
+                             'particularly applicable to StyleGAN (v1/v2). '
+                             '(default: %(default)s)')
+    parser.add_argument('--trunc_layers', type=int, default=8,
+                        help='Number of layers to perform truncation. This is '
+                             'particularly applicable to StyleGAN (v1/v2). '
+                             '(default: %(default)s)')
+    parser.add_argument('--seed', type=int, default=0,
+                        help='Seed for sampling. (default: %(default)s)')
+    parser.add_argument('--gpu_id', type=str, default='0',
+                        help='GPU(s) to use. (default: %(default)s)')
+    return parser.parse_args()
+def main():
+    """Main function."""
+    args = parse_args()
+    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id
+    os.makedirs(args.save_dir, exist_ok=True)
+    # Factorize weights.
+    generator = load_generator(args.model_name)
+    gan_type = parse_gan_type(generator)
+    layers, boundaries, values = factorize_weight(generator, args.layer_idx)
+    # Set random seed.
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    # Prepare codes.
+    codes = torch.randn(args.num_samples, generator.z_space_dim).cuda()
+    if gan_type == 'pggan':
+        codes = generator.layer0.pixel_norm(codes)
+    elif gan_type in ['stylegan', 'stylegan2']:
+        codes = generator.mapping(codes)['w']
+        codes = generator.truncation(codes,
+                                     trunc_psi=args.trunc_psi,
+                                     trunc_layers=args.trunc_layers)
+    codes = codes.detach().cpu().numpy()
+    # Generate visualization pages.
+    distances = np.linspace(args.start_distance,args.end_distance, args.step)
+    num_sam = args.num_samples
+    num_sem = args.num_semantics
+    vizer_1 = HtmlPageVisualizer(num_rows=num_sem * (num_sam + 1),
+                                 num_cols=args.step + 1,
+                                 viz_size=args.viz_size)
+    vizer_2 = HtmlPageVisualizer(num_rows=num_sam * (num_sem + 1),
+                                 num_cols=args.step + 1,
+                                 viz_size=args.viz_size)
+    headers = [''] + [f'Distance {d:.2f}' for d in distances]
+    vizer_1.set_headers(headers)
+    vizer_2.set_headers(headers)
+    for sem_id in range(num_sem):
+        value = values[sem_id]
+        vizer_1.set_cell(sem_id * (num_sam + 1), 0,
+                         text=f'Semantic {sem_id:03d}<br>({value:.3f})',
+                         highlight=True)
+        for sam_id in range(num_sam):
+            vizer_1.set_cell(sem_id * (num_sam + 1) + sam_id + 1, 0,
+                             text=f'Sample {sam_id:03d}')
+    for sam_id in range(num_sam):
+        vizer_2.set_cell(sam_id * (num_sem + 1), 0,
+                         text=f'Sample {sam_id:03d}',
+                         highlight=True)
+        for sem_id in range(num_sem):
+            value = values[sem_id]
+            vizer_2.set_cell(sam_id * (num_sem + 1) + sem_id + 1, 0,
+                             text=f'Semantic {sem_id:03d}<br>({value:.3f})')
+    for sam_id in tqdm(range(num_sam), desc='Sample ', leave=False):
+        code = codes[sam_id:sam_id + 1]
+        for sem_id in tqdm(range(num_sem), desc='Semantic ', leave=False):
+            boundary = boundaries[sem_id:sem_id + 1]
+            for col_id, d in enumerate(distances, start=1):
+                temp_code = code.copy()
+                if gan_type == 'pggan':
+                    temp_code += boundary * d
+                    image = generator(to_tensor(temp_code))['image']
+                elif gan_type in ['stylegan', 'stylegan2']:
+                    temp_code[:, layers, :] += boundary * d
+                    image = generator.synthesis(to_tensor(temp_code))['image']
+                image = postprocess(image)[0]
+                vizer_1.set_cell(sem_id * (num_sam + 1) + sam_id + 1, col_id,
+                                 image=image)
+                vizer_2.set_cell(sam_id * (num_sem + 1) + sem_id + 1, col_id,
+                                 image=image)
+    prefix = (f'{args.model_name}_'
+              f'N{num_sam}_K{num_sem}_L{args.layer_idx}_seed{args.seed}')
+    vizer_1.save(os.path.join(args.save_dir, f'{prefix}_sample_first.html'))
+    vizer_2.save(os.path.join(args.save_dir, f'{prefix}_semantic_first.html'))
+if __name__ == '__main__':
+    main()

utils.py ADDED Viewed

	@@ -0,0 +1,509 @@

+"""Utility functions."""
+import base64
+import os
+import subprocess
+import cv2
+import numpy as np
+import torch
+from models import MODEL_ZOO
+from models import build_generator
+from models import parse_gan_type
+__all__ = ['postprocess', 'load_generator', 'factorize_weight',
+           'HtmlPageVisualizer']
+CHECKPOINT_DIR = 'checkpoints'
+def to_tensor(array):
+    """Converts a `numpy.ndarray` to `torch.Tensor`.
+    Args:
+      array: The input array to convert.
+    Returns:
+      A `torch.Tensor` with dtype `torch.FloatTensor` on cuda device.
+    """
+    assert isinstance(array, np.ndarray)
+    return torch.from_numpy(array).type(torch.FloatTensor).cuda()
+def postprocess(images, min_val=-1.0, max_val=1.0):
+    """Post-processes images from `torch.Tensor` to `numpy.ndarray`.
+    Args:
+        images: A `torch.Tensor` with shape `NCHW` to process.
+        min_val: The minimum value of the input tensor. (default: -1.0)
+        max_val: The maximum value of the input tensor. (default: 1.0)
+    Returns:
+        A `numpy.ndarray` with shape `NHWC` and pixel range [0, 255].
+    """
+    assert isinstance(images, torch.Tensor)
+    images = images.detach().cpu().numpy()
+    images = (images - min_val) * 255 / (max_val - min_val)
+    images = np.clip(images + 0.5, 0, 255).astype(np.uint8)
+    images = images.transpose(0, 2, 3, 1)
+    return images
+def load_generator(model_name):
+    """Loads pre-trained generator.
+    Args:
+        model_name: Name of the model. Should be a key in `models.MODEL_ZOO`.
+    Returns:
+        A generator, which is a `torch.nn.Module`, with pre-trained weights
+            loaded.
+    Raises:
+        KeyError: If the input `model_name` is not in `models.MODEL_ZOO`.
+    """
+    if model_name not in MODEL_ZOO:
+        raise KeyError(f'Unknown model name `{model_name}`!')
+    model_config = MODEL_ZOO[model_name].copy()
+    url = model_config.pop('url')  # URL to download model if needed.
+    # Build generator.
+    print(f'Building generator for model `{model_name}` ...')
+    generator = build_generator(**model_config)
+    print(f'Finish building generator.')
+    # Load pre-trained weights.
+    os.makedirs(CHECKPOINT_DIR, exist_ok=True)
+    checkpoint_path = os.path.join(CHECKPOINT_DIR, model_name + '.pth')
+    print(f'Loading checkpoint from `{checkpoint_path}` ...')
+    if not os.path.exists(checkpoint_path):
+        print(f'  Downloading checkpoint from `{url}` ...')
+        subprocess.call(['wget', '--quiet', '-O', checkpoint_path, url])
+        print(f'  Finish downloading checkpoint.')
+    checkpoint = torch.load(checkpoint_path, map_location='cpu')
+    if 'generator_smooth' in checkpoint:
+        generator.load_state_dict(checkpoint['generator_smooth'])
+    else:
+        generator.load_state_dict(checkpoint['generator'])
+    generator = generator.cuda()
+    generator.eval()
+    print(f'Finish loading checkpoint.')
+    return generator
+def parse_indices(obj, min_val=None, max_val=None):
+    """Parses indices.
+    The input can be a list or a tuple or a string, which is either a comma
+    separated list of numbers 'a, b, c', or a dash separated range 'a - c'.
+    Space in the string will be ignored.
+    Args:
+        obj: The input object to parse indices from.
+        min_val: If not `None`, this function will check that all indices are
+            equal to or larger than this value. (default: None)
+        max_val: If not `None`, this function will check that all indices are
+            equal to or smaller than this value. (default: None)
+    Returns:
+        A list of integers.
+    Raises:
+        If the input is invalid, i.e., neither a list or tuple, nor a string.
+    """
+    if obj is None or obj == '':
+        indices = []
+    elif isinstance(obj, int):
+        indices = [obj]
+    elif isinstance(obj, (list, tuple, np.ndarray)):
+        indices = list(obj)
+    elif isinstance(obj, str):
+        indices = []
+        splits = obj.replace(' ', '').split(',')
+        for split in splits:
+            numbers = list(map(int, split.split('-')))
+            if len(numbers) == 1:
+                indices.append(numbers[0])
+            elif len(numbers) == 2:
+                indices.extend(list(range(numbers[0], numbers[1] + 1)))
+            else:
+                raise ValueError(f'Unable to parse the input!')
+    else:
+        raise ValueError(f'Invalid type of input: `{type(obj)}`!')
+    assert isinstance(indices, list)
+    indices = sorted(list(set(indices)))
+    for idx in indices:
+        assert isinstance(idx, int)
+        if min_val is not None:
+            assert idx >= min_val, f'{idx} is smaller than min val `{min_val}`!'
+        if max_val is not None:
+            assert idx <= max_val, f'{idx} is larger than max val `{max_val}`!'
+    return indices
+def factorize_weight(generator, layer_idx='all'):
+    """Factorizes the generator weight to get semantics boundaries.
+    Args:
+        generator: Generator to factorize.
+        layer_idx: Indices of layers to interpret, especially for StyleGAN and
+            StyleGAN2. (default: `all`)
+    Returns:
+        A tuple of (layers_to_interpret, semantic_boundaries, eigen_values).
+    Raises:
+        ValueError: If the generator type is not supported.
+    """
+    # Get GAN type.
+    gan_type = parse_gan_type(generator)
+    # Get layers.
+    if gan_type == 'pggan':
+        layers = [0]
+    elif gan_type in ['stylegan', 'stylegan2']:
+        if layer_idx == 'all':
+            layers = list(range(generator.num_layers))
+        else:
+            layers = parse_indices(layer_idx,
+                                   min_val=0,
+                                   max_val=generator.num_layers - 1)
+    # Factorize semantics from weight.
+    weights = []
+    for idx in layers:
+        layer_name = f'layer{idx}'
+        if gan_type == 'stylegan2' and idx == generator.num_layers - 1:
+            layer_name = f'output{idx // 2}'
+        if gan_type == 'pggan':
+            weight = generator.__getattr__(layer_name).weight
+            weight = weight.flip(2, 3).permute(1, 0, 2, 3).flatten(1)
+        elif gan_type in ['stylegan', 'stylegan2']:
+            weight = generator.synthesis.__getattr__(layer_name).style.weight.T
+        weights.append(weight.cpu().detach().numpy())
+    weight = np.concatenate(weights, axis=1).astype(np.float32)
+    weight = weight / np.linalg.norm(weight, axis=0, keepdims=True)
+    eigen_values, eigen_vectors = np.linalg.eig(weight.dot(weight.T))
+    return layers, eigen_vectors.T, eigen_values
+def get_sortable_html_header(column_name_list, sort_by_ascending=False):
+    """Gets header for sortable html page.
+    Basically, the html page contains a sortable table, where user can sort the
+    rows by a particular column by clicking the column head.
+    Example:
+    column_name_list = [name_1, name_2, name_3]
+    header = get_sortable_html_header(column_name_list)
+    footer = get_sortable_html_footer()
+    sortable_table = ...
+    html_page = header + sortable_table + footer
+    Args:
+        column_name_list: List of column header names.
+        sort_by_ascending: Default sorting order. If set as `True`, the html
+            page will be sorted by ascending order when the header is clicked
+            for the first time.
+    Returns:
+        A string, which represents for the header for a sortable html page.
+    """
+    header = '\n'.join([
+        '<script type="text/javascript">',
+        'var column_idx;',
+        'var sort_by_ascending = ' + str(sort_by_ascending).lower() + ';',
+        '',
+        'function sorting(tbody, column_idx){',
+        '    this.column_idx = column_idx;',
+        '    Array.from(tbody.rows)',
+        '             .sort(compareCells)',
+        '             .forEach(function(row) { tbody.appendChild(row); })',
+        '    sort_by_ascending = !sort_by_ascending;',
+        '}',
+        '',
+        'function compareCells(row_a, row_b) {',
+        '    var val_a = row_a.cells[column_idx].innerText;',
+        '    var val_b = row_b.cells[column_idx].innerText;',
+        '    var flag = sort_by_ascending ? 1 : -1;',
+        '    return flag * (val_a > val_b ? 1 : -1);',
+        '}',
+        '</script>',
+        '',
+        '<html>',
+        '',
+        '<head>',
+        '<style>',
+        '    table {',
+        '        border-spacing: 0;',
+        '        border: 1px solid black;',
+        '    }',
+        '    th {',
+        '        cursor: pointer;',
+        '    }',
+        '    th, td {',
+        '        text-align: left;',
+        '        vertical-align: middle;',
+        '        border-collapse: collapse;',
+        '        border: 0.5px solid black;',
+        '        padding: 8px;',
+        '    }',
+        '    tr:nth-child(even) {',
+        '        background-color: #d2d2d2;',
+        '    }',
+        '</style>',
+        '</head>',
+        '',
+        '<body>',
+        '',
+        '<table>',
+        '<thead>',
+        '<tr>',
+        ''])
+    for idx, name in enumerate(column_name_list):
+        header += f'    <th onclick="sorting(tbody, {idx})">{name}</th>\n'
+    header += '</tr>\n'
+    header += '</thead>\n'
+    header += '<tbody id="tbody">\n'
+    return header
+def get_sortable_html_footer():
+    """Gets footer for sortable html page.
+    Check function `get_sortable_html_header()` for more details.
+    """
+    return '</tbody>\n</table>\n\n</body>\n</html>\n'
+def parse_image_size(obj):
+    """Parses object to a pair of image size, i.e., (width, height).
+    Args:
+        obj: The input object to parse image size from.
+    Returns:
+        A two-element tuple, indicating image width and height respectively.
+    Raises:
+        If the input is invalid, i.e., neither a list or tuple, nor a string.
+    """
+    if obj is None or obj == '':
+        width = height = 0
+    elif isinstance(obj, int):
+        width = height = obj
+    elif isinstance(obj, (list, tuple, np.ndarray)):
+        numbers = tuple(obj)
+        if len(numbers) == 0:
+            width = height = 0
+        elif len(numbers) == 1:
+            width = height = numbers[0]
+        elif len(numbers) == 2:
+            width = numbers[0]
+            height = numbers[1]
+        else:
+            raise ValueError(f'At most two elements for image size.')
+    elif isinstance(obj, str):
+        splits = obj.replace(' ', '').split(',')
+        numbers = tuple(map(int, splits))
+        if len(numbers) == 0:
+            width = height = 0
+        elif len(numbers) == 1:
+            width = height = numbers[0]
+        elif len(numbers) == 2:
+            width = numbers[0]
+            height = numbers[1]
+        else:
+            raise ValueError(f'At most two elements for image size.')
+    else:
+        raise ValueError(f'Invalid type of input: {type(obj)}!')
+    return (max(0, width), max(0, height))
+def encode_image_to_html_str(image, image_size=None):
+    """Encodes an image to html language.
+    NOTE: Input image is always assumed to be with `RGB` channel order.
+    Args:
+        image: The input image to encode. Should be with `RGB` channel order.
+        image_size: This field is used to resize the image before encoding. `0`
+            disables resizing. (default: None)
+    Returns:
+        A string which represents the encoded image.
+    """
+    if image is None:
+        return ''
+    assert image.ndim == 3 and image.shape[2] in [1, 3]
+    # Change channel order to `BGR`, which is opencv-friendly.
+    image = image[:, :, ::-1]
+    # Resize the image if needed.
+    width, height = parse_image_size(image_size)
+    if height or width:
+        height = height or image.shape[0]
+        width = width or image.shape[1]
+        image = cv2.resize(image, (width, height))
+    # Encode the image to html-format string.
+    encoded_image = cv2.imencode('.jpg', image)[1].tostring()
+    encoded_image_base64 = base64.b64encode(encoded_image).decode('utf-8')
+    html_str = f'<img src="data:image/jpeg;base64, {encoded_image_base64}"/>'
+    return html_str
+def get_grid_shape(size, row=0, col=0, is_portrait=False):
+    """Gets the shape of a grid based on the size.
+    This function makes greatest effort on making the output grid square if
+    neither `row` nor `col` is set. If `is_portrait` is set as `False`, the
+    height will always be equal to or smaller than the width. For example, if
+    input `size = 16`, output shape will be `(4, 4)`; if input `size = 15`,
+    output shape will be (3, 5). Otherwise, the height will always be equal to
+    or larger than the width.
+    Args:
+        size: Size (height * width) of the target grid.
+        is_portrait: Whether to return a portrait size of a landscape size.
+            (default: False)
+    Returns:
+        A two-element tuple, representing height and width respectively.
+    """
+    assert isinstance(size, int)
+    assert isinstance(row, int)
+    assert isinstance(col, int)
+    if size == 0:
+        return (0, 0)
+    if row > 0 and col > 0 and row * col != size:
+        row = 0
+        col = 0
+    if row > 0 and size % row == 0:
+        return (row, size // row)
+    if col > 0 and size % col == 0:
+        return (size // col, col)
+    row = int(np.sqrt(size))
+    while row > 0:
+        if size % row == 0:
+            col = size // row
+            break
+        row = row - 1
+    return (col, row) if is_portrait else (row, col)
+class HtmlPageVisualizer(object):
+    """Defines the html page visualizer.
+    This class can be used to visualize image results as html page. Basically,
+    it is based on an html-format sorted table with helper functions
+    `get_sortable_html_header()`, `get_sortable_html_footer()`, and
+    `encode_image_to_html_str()`. To simplify the usage, specifying the
+    following fields are enough to create a visualization page:
+    (1) num_rows: Number of rows of the table (header-row exclusive).
+    (2) num_cols: Number of columns of the table.
+    (3) header contents (optional): Title of each column.
+    NOTE: `grid_size` can be used to assign `num_rows` and `num_cols`
+    automatically.
+    Example:
+    html = HtmlPageVisualizer(num_rows, num_cols)
+    html.set_headers([...])
+    for i in range(num_rows):
+        for j in range(num_cols):
+            html.set_cell(i, j, text=..., image=..., highlight=False)
+    html.save('visualize.html')
+    """
+    def __init__(self,
+                 num_rows=0,
+                 num_cols=0,
+                 grid_size=0,
+                 is_portrait=True,
+                 viz_size=None):
+        if grid_size > 0:
+            num_rows, num_cols = get_grid_shape(
+                grid_size, row=num_rows, col=num_cols, is_portrait=is_portrait)
+        assert num_rows > 0 and num_cols > 0
+        self.num_rows = num_rows
+        self.num_cols = num_cols
+        self.viz_size = parse_image_size(viz_size)
+        self.headers = ['' for _ in range(self.num_cols)]
+        self.cells = [[{
+            'text': '',
+            'image': '',
+            'highlight': False,
+        } for _ in range(self.num_cols)] for _ in range(self.num_rows)]
+    def set_header(self, col_idx, content):
+        """Sets the content of a particular header by column index."""
+        self.headers[col_idx] = content
+    def set_headers(self, contents):
+        """Sets the contents of all headers."""
+        if isinstance(contents, str):
+            contents = [contents]
+        assert isinstance(contents, (list, tuple))
+        assert len(contents) == self.num_cols
+        for col_idx, content in enumerate(contents):
+            self.set_header(col_idx, content)
+    def set_cell(self, row_idx, col_idx, text='', image=None, highlight=False):
+        """Sets the content of a particular cell.
+        Basically, a cell contains some text as well as an image. Both text and
+        image can be empty.
+        Args:
+            row_idx: Row index of the cell to edit.
+            col_idx: Column index of the cell to edit.
+            text: Text to add into the target cell. (default: None)
+            image: Image to show in the target cell. Should be with `RGB`
+                channel order. (default: None)
+            highlight: Whether to highlight this cell. (default: False)
+        """
+        self.cells[row_idx][col_idx]['text'] = text
+        self.cells[row_idx][col_idx]['image'] = encode_image_to_html_str(
+            image, self.viz_size)
+        self.cells[row_idx][col_idx]['highlight'] = bool(highlight)
+    def save(self, save_path):
+        """Saves the html page."""
+        html = ''
+        for i in range(self.num_rows):
+            html += f'<tr>\n'
+            for j in range(self.num_cols):
+                text = self.cells[i][j]['text']
+                image = self.cells[i][j]['image']
+                if self.cells[i][j]['highlight']:
+                    color = ' bgcolor="#FF8888"'
+                else:
+                    color = ''
+                if text:
+                    html += f'    <td{color}>{text}<br><br>{image}</td>\n'
+                else:
+                    html += f'    <td{color}>{image}</td>\n'
+            html += f'</tr>\n'
+        header = get_sortable_html_header(self.headers)
+        footer = get_sortable_html_footer()
+        with open(save_path, 'w') as f:
+            f.write(header + html + footer)