|
eval: |
|
eval_datasets: null |
|
json_datasets: null |
|
test_datasets: |
|
- name: generator_mean |
|
length: 96 |
|
generator: PATTERN |
|
num_pairs: 4 |
|
batch_size: 96 |
|
num_tasks_to_show: 32 |
|
task_generator_kwargs: |
|
num_cols: 4 |
|
num_rows: 4 |
|
pattern_size: 2 |
|
- name: generator_gradient_ascent_5 |
|
length: 96 |
|
generator: PATTERN |
|
num_pairs: 4 |
|
batch_size: 96 |
|
inference_mode: gradient_ascent |
|
inference_kwargs: |
|
lr: 0.1 |
|
num_steps: 10 |
|
num_tasks_to_show: 32 |
|
task_generator_kwargs: |
|
num_cols: 4 |
|
num_rows: 4 |
|
pattern_size: 2 |
|
training: |
|
seed: 0 |
|
use_hf: true |
|
kl_coeff: 0.001 |
|
batch_size: 128 |
|
learning_rate: 0.0004 |
|
inference_mode: mean |
|
task_generator: |
|
class: PATTERN |
|
num_cols: 4 |
|
num_rows: 4 |
|
num_pairs: 4 |
|
num_workers: 16 |
|
pattern_size: 2 |
|
train_datasets: null |
|
mixed_precision: false |
|
total_num_steps: 200000 |
|
inference_kwargs: null |
|
eval_every_n_logs: 20 |
|
log_every_n_steps: 1000 |
|
resume_from_checkpoint: null |
|
online_data_augmentation: false |
|
gradient_accumulation_steps: 1 |
|
save_checkpoint_every_n_logs: 200 |
|
decoder_transformer: |
|
_target_: src_v2.models.utils.DecoderTransformerConfig |
|
max_cols: 4 |
|
max_rows: 4 |
|
num_layers: 2 |
|
transformer_layer: |
|
_target_: src_v2.models.utils.TransformerLayerConfig |
|
num_heads: 6 |
|
dropout_rate: 0.0 |
|
mlp_dim_factor: 4.0 |
|
emb_dim_per_head: 12 |
|
attention_dropout_rate: 0.0 |
|
encoder_transformer: |
|
_target_: src_v2.models.utils.EncoderTransformerConfig |
|
max_cols: 4 |
|
max_rows: 4 |
|
latent_dim: 2 |
|
num_layers: 2 |
|
variational: true |
|
transformer_layer: |
|
_target_: src_v2.models.utils.TransformerLayerConfig |
|
num_heads: 6 |
|
dropout_rate: 0.0 |
|
mlp_dim_factor: 4.0 |
|
emb_dim_per_head: 12 |
|
attention_dropout_rate: 0.0 |
|
latent_projection_bias: false |
|
|