# Dataset parameters
# Each dataset should contain 2 folders train and test
# Each video can be represented as:
#   - an image of concatenated frames
#   - '.mp4' or '.gif'
#   - folder with all frames from a specific video
# In case of Taichi. Same (youtube) video can be splitted in many parts (chunks). Each part has a following
# format (id)#other#info.mp4. For example '12335#adsbf.mp4' has an id 12335. In case of TaiChi id stands for youtube
# video id.
dataset_params:
  # Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames.
  root_dir: ../taichi
  # Image shape, needed for staked .png format.
  frame_shape: null
  # In case of TaiChi single video can be splitted in many chunks, or the maybe several videos for single person.
  # In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False)
  # If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335
  id_sampling: True
  # Augmentation parameters see augmentation.py for all posible augmentations
  augmentation_params:
    flip_param:
      horizontal_flip: True
      time_flip: True
    jitter_param:
      brightness: 0.1
      contrast: 0.1
      saturation: 0.1
      hue: 0.1

# Defines model architecture
model_params:
  common_params:
    # Number of TPS transformation
    num_tps: 10
    # Number of channels per image
    num_channels: 3
    # Whether to estimate affine background transformation
    bg: True
    # Whether to estimate the multi-resolution occlusion masks
    multi_mask: True
  generator_params:
    # Number of features mutliplier
    block_expansion: 64
    # Maximum allowed number of features
    max_features: 512
    # Number of downsampling blocks and Upsampling blocks.
    num_down_blocks: 3
  dense_motion_params:
    # Number of features mutliplier
    block_expansion: 64
    # Maximum allowed number of features
    max_features: 1024
    # Number of block in Unet. 
    num_blocks: 5
    # Optical flow is predicted on smaller images for better performance,
    # scale_factor=0.25 means that 256x256 image will be resized to 64x64  
    scale_factor: 0.25
  avd_network_params:
    # Bottleneck for identity branch
    id_bottle_size: 128
    # Bottleneck for pose branch
    pose_bottle_size: 128

# Parameters of training
train_params:
  # Number of training epochs 
  num_epochs: 100
  # For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
  # Thus effectivlly with num_repeats=100 each epoch is 100 times larger. 
  num_repeats: 150
  # Drop learning rate by 10 times after this epochs 
  epoch_milestones: [70, 90]
  # Initial learing rate for all modules
  lr_generator: 2.0e-4
  batch_size: 28
  # Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256,
  # than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32.
  scales: [1, 0.5, 0.25, 0.125]
  # Dataset preprocessing cpu workers
  dataloader_workers: 12
  # Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
  checkpoint_freq: 50
  # Parameters of dropout
  # The first dropout_epoch training uses dropout operation 
  dropout_epoch: 35
  # The probability P will linearly increase from dropout_startp to dropout_maxp in dropout_inc_epoch epochs
  dropout_maxp: 0.7
  dropout_startp: 0.0
  dropout_inc_epoch: 10
  # Estimate affine background transformation from the bg_start epoch.
  bg_start: 0
  # Parameters of random TPS transformation for equivariance loss
  transform_params:
    # Sigma for affine part
    sigma_affine: 0.05
    # Sigma for deformation part
    sigma_tps: 0.005
    # Number of point in the deformation grid
    points_tps: 5
  loss_weights:
    # Weights for perceptual loss.
    perceptual: [10, 10, 10, 10, 10]
    # Weights for value equivariance.
    equivariance_value: 10
    # Weights for warp loss.
    warp_loss: 10
    # Weights for bg loss.
    bg: 10

# Parameters of training (animation-via-disentanglement)
train_avd_params:
  # Number of training epochs, visualization is produced after each epoch.
  num_epochs: 100
  # For better i/o performance when number of videos is small number of epochs can be multiplied by this number.
  # Thus effectively with num_repeats=100 each epoch is 100 times larger.
  num_repeats: 150
  # Batch size.
  batch_size: 256
  # Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs.
  checkpoint_freq: 10
  # Dataset preprocessing cpu workers
  dataloader_workers: 24
  # Drop learning rate 10 times after this epochs
  epoch_milestones: [70, 90]
  # Initial learning rate
  lr: 1.0e-3
  # Weights for equivariance loss.
  lambda_shift: 1
  random_scale: 0.25

visualizer_params:
  kp_size: 5
  draw_border: True
  colormap: 'gist_rainbow'