{"behaviors": {"Pyramids": {"trainer_type": "ppo", "hyperparameters": {"batch_size": 128, "buffer_size": 2048, "learning_rate": 0.0003, "beta": 0.01, "epsilon": 0.2, "lambd": 0.95, "num_epoch": 3, "learning_rate_schedule": "linear"}, "network_settings": {"normalize": false, "hidden_units": 512, "num_layers": 2, "vis_encode_type": "simple"}, "reward_signals": {"extrinsic": {"gamma": 0.99, "strength": 1.0}, "rnd": {"gamma": 0.99, "strength": 0.01, "network_settings": {"hidden_units": 64, "num_layers": 3}, "learning_rate": 0.0001}}, "keep_checkpoints": 5, "max_steps": 1500000, "time_horizon": 128, "summary_freq": 30000}}} |