StructDiffusionDemo / configs /conditional_pose_diffusion_language.yaml
Weiyu Liu
add natural language model and app
f392320
random_seed: 1
WANDB:
project: StructDiffusion
save_dir: ${base_dirs.wandb_dir}
name: conditional_pose_diffusion_language_shuffle
DATASET:
data_root: ${base_dirs.data}
vocab_dir: ${base_dirs.data}/type_vocabs_coarse.json
# important
use_virtual_structure_frame: True
ignore_distractor_objects: True
ignore_rgb: True
# the following are determined by the dataset
max_num_target_objects: 7
max_num_distractor_objects: 5
# set to 1 because we use sentence embedding, which only takes one spot in the input seq to transformer diffusion
max_num_shape_parameters: 1
# set to zeros because they are not used for now
max_num_rearrange_features: 0
max_num_anchor_features: 0
# language
sentence_embedding_file: ${base_dirs.data}/template_sentence_data.pkl
use_incomplete_sentence: True
# shuffle
shuffle_object_index: True
num_pts: 1024
filter_num_moved_objects_range:
data_augmentation: False
DATALOADER:
batch_size: 64
num_workers: 8
pin_memory: True
MODEL:
# transformer encoder
encoder_input_dim: 256
num_attention_heads: 8
encoder_hidden_dim: 512
encoder_dropout: 0.0
encoder_activation: relu
encoder_num_layers: 8
# output head
structure_dropout: 0
object_dropout: 0
# pc encoder
ignore_rgb: ${DATASET.ignore_rgb}
pc_emb_dim: 256
posed_pc_emb_dim: 80
# pose encoder
pose_emb_dim: 80
# language
word_emb_dim: 160
# diffusion step
time_emb_dim: 80
# sequence embeddings
# max_num_target_objects (+ max_num_distractor_objects if not ignore_distractor_objects)
max_seq_size: 7
max_token_type_size: 4
seq_pos_emb_dim: 8
seq_type_emb_dim: 8
# virtual frame
use_virtual_structure_frame: ${DATASET.use_virtual_structure_frame}
# language
use_sentence_embedding: True
sentence_embedding_dim: 384
NOISE_SCHEDULE:
timesteps: 200
LOSS:
type: huber
OPTIMIZER:
lr: 0.0001
weight_decay: 0 #0.0001
# lr_restart: 3000
# warmup: 10
TRAINER:
max_epochs: 200
gradient_clip_val: 1.0
gpus: 1
deterministic: False
# enable_progress_bar: False