clement-bonnet's picture
Upload 2 files
3c6f1d7 verified
eval:
eval_datasets: null
json_datasets: null
test_datasets:
- name: generator_mean
length: 96
generator: PATTERN
num_pairs: 4
batch_size: 96
num_tasks_to_show: 32
task_generator_kwargs:
num_cols: 4
num_rows: 4
pattern_size: 2
- name: generator_gradient_ascent_5
length: 96
generator: PATTERN
num_pairs: 4
batch_size: 96
inference_mode: gradient_ascent
inference_kwargs:
lr: 0.1
num_steps: 10
num_tasks_to_show: 32
task_generator_kwargs:
num_cols: 4
num_rows: 4
pattern_size: 2
training:
seed: 0
use_hf: true
kl_coeff: 0.001
batch_size: 128
learning_rate: 0.0004
inference_mode: mean
task_generator:
class: PATTERN
num_cols: 4
num_rows: 4
num_pairs: 4
num_workers: 16
pattern_size: 2
train_datasets: null
mixed_precision: false
total_num_steps: 200000
inference_kwargs: null
eval_every_n_logs: 20
log_every_n_steps: 1000
resume_from_checkpoint: null
online_data_augmentation: false
gradient_accumulation_steps: 1
save_checkpoint_every_n_logs: 200
decoder_transformer:
_target_: src_v2.models.utils.DecoderTransformerConfig
max_cols: 4
max_rows: 4
num_layers: 2
transformer_layer:
_target_: src_v2.models.utils.TransformerLayerConfig
num_heads: 6
dropout_rate: 0.0
mlp_dim_factor: 4.0
emb_dim_per_head: 12
attention_dropout_rate: 0.0
encoder_transformer:
_target_: src_v2.models.utils.EncoderTransformerConfig
max_cols: 4
max_rows: 4
latent_dim: 2
num_layers: 2
variational: true
transformer_layer:
_target_: src_v2.models.utils.TransformerLayerConfig
num_heads: 6
dropout_rate: 0.0
mlp_dim_factor: 4.0
emb_dim_per_head: 12
attention_dropout_rate: 0.0
latent_projection_bias: false