{ "adam_beta1": 0.9, "adam_beta2": 0.999, "adam_weight_decay": 0.01, "amp": false, "class_conditional": false, "class_unconditional_prob": 0.1, "clip_grad_norm": 1.0, "dataset_name": "roc", "dim_ae": 64, "disable_dropout": false, "dropout": 0.1, "ema_decay": 0.9999, "ema_update_every": 1, "enc_dec_model": "facebook/bart-base", "eval": false, "eval_batch_size": 32, "eval_every": 1000, "eval_test": false, "gradient_accumulation_steps": 1, "init_path": null, "l2_normalize_latents": true, "latent_dim": 64, "latent_model_path": "saved_latent_models/roc/2024-11-24_09-55-03", "learning_rate": 0.0001, "lm_mode": "freeze", "loss_type": "l2", "lr_schedule": "linear", "lr_warmup_steps": 1000, "max_seq_len": 64, "mixed_precision": "no", "normalize_latent": false, "num_decoder_latents": 32, "num_dense_connections": 3, "num_devices": 1, "num_encoder_latents": 32, "num_layers": 3, "num_samples": 1000, "num_train_steps": 50000, "objective": "pred_v", "optimizer": "adamw", "output_dir": "saved_latent_models/roc/2024-11-24_09-55-03", "resume_dir": null, "resume_training": false, "sampler": "ddpm", "sampling_schedule": null, "sampling_timesteps": 250, "save_and_sample_every": 5000, "save_dir": "saved_latent_models", "scale": 1.0, "scale_shift": true, "self_condition": true, "seq2seq_candidates": 5, "seq2seq_unconditional_prob": 0.1, "train_batch_size": 256, "train_prob_self_cond": 0.5, "train_schedule": "cosine", "trainable_params": 187928960, "tx_depth": 12, "tx_dim": 768, "wandb_name": "bart-roc-l2norm-test-32-64" }