Spaces:
Running
Running
#-*-Python-*- | |
# Create replay_buffer | |
agent/CircularBuffer.buffer_size = 200000 | |
meta/CircularBuffer.buffer_size = 200000 | |
agent/CircularBuffer.scope = "agent" | |
meta/CircularBuffer.scope = "meta" | |
# Config train | |
train_uvf.environment = @create_maze_env() | |
train_uvf.agent_class = %AGENT_CLASS | |
train_uvf.meta_agent_class = %META_CLASS | |
train_uvf.state_preprocess_class = %STATE_PREPROCESS_CLASS | |
train_uvf.inverse_dynamics_class = %INVERSE_DYNAMICS_CLASS | |
train_uvf.replay_buffer = @agent/CircularBuffer() | |
train_uvf.meta_replay_buffer = @meta/CircularBuffer() | |
train_uvf.critic_optimizer = @critic/AdamOptimizer() | |
train_uvf.actor_optimizer = @actor/AdamOptimizer() | |
train_uvf.meta_critic_optimizer = @meta_critic/AdamOptimizer() | |
train_uvf.meta_actor_optimizer = @meta_actor/AdamOptimizer() | |
train_uvf.repr_optimizer = @repr/AdamOptimizer() | |
train_uvf.num_episodes_train = 25000 | |
train_uvf.batch_size = 100 | |
train_uvf.initial_episodes = 5 | |
train_uvf.gamma = 0.99 | |
train_uvf.meta_gamma = 0.99 | |
train_uvf.reward_scale_factor = 1.0 | |
train_uvf.target_update_period = 2 | |
train_uvf.num_updates_per_observation = 1 | |
train_uvf.num_collect_per_update = 1 | |
train_uvf.num_collect_per_meta_update = 10 | |
train_uvf.debug_summaries = False | |
train_uvf.log_every_n_steps = 1000 | |
train_uvf.save_policy_every_n_steps =100000 | |
# Config Optimizers | |
critic/AdamOptimizer.learning_rate = 0.001 | |
critic/AdamOptimizer.beta1 = 0.9 | |
critic/AdamOptimizer.beta2 = 0.999 | |
actor/AdamOptimizer.learning_rate = 0.0001 | |
actor/AdamOptimizer.beta1 = 0.9 | |
actor/AdamOptimizer.beta2 = 0.999 | |
meta_critic/AdamOptimizer.learning_rate = 0.001 | |
meta_critic/AdamOptimizer.beta1 = 0.9 | |
meta_critic/AdamOptimizer.beta2 = 0.999 | |
meta_actor/AdamOptimizer.learning_rate = 0.0001 | |
meta_actor/AdamOptimizer.beta1 = 0.9 | |
meta_actor/AdamOptimizer.beta2 = 0.999 | |
repr/AdamOptimizer.learning_rate = 0.0001 | |
repr/AdamOptimizer.beta1 = 0.9 | |
repr/AdamOptimizer.beta2 = 0.999 | |