Spaces:
Running
Running
#-*-Python-*- | |
import gin.tf.external_configurables | |
create_maze_env.top_down_view = %IMAGES | |
## Create the agent | |
AGENT_CLASS = @UvfAgent | |
UvfAgent.tf_context = %CONTEXT | |
UvfAgent.actor_net = @agent/ddpg_actor_net | |
UvfAgent.critic_net = @agent/ddpg_critic_net | |
UvfAgent.dqda_clipping = 0.0 | |
UvfAgent.td_errors_loss = @tf.losses.huber_loss | |
UvfAgent.target_q_clipping = %TARGET_Q_CLIPPING | |
# Create meta agent | |
META_CLASS = @MetaAgent | |
MetaAgent.tf_context = %META_CONTEXT | |
MetaAgent.sub_context = %CONTEXT | |
MetaAgent.actor_net = @meta/ddpg_actor_net | |
MetaAgent.critic_net = @meta/ddpg_critic_net | |
MetaAgent.dqda_clipping = 0.0 | |
MetaAgent.td_errors_loss = @tf.losses.huber_loss | |
MetaAgent.target_q_clipping = %TARGET_Q_CLIPPING | |
# Create state preprocess | |
STATE_PREPROCESS_CLASS = @StatePreprocess | |
StatePreprocess.ndims = %SUBGOAL_DIM | |
state_preprocess_net.states_hidden_layers = (100, 100) | |
state_preprocess_net.num_output_dims = %SUBGOAL_DIM | |
state_preprocess_net.images = %IMAGES | |
action_embed_net.num_output_dims = %SUBGOAL_DIM | |
INVERSE_DYNAMICS_CLASS = @InverseDynamics | |
# actor_net | |
ACTOR_HIDDEN_SIZE_1 = 300 | |
ACTOR_HIDDEN_SIZE_2 = 300 | |
agent/ddpg_actor_net.hidden_layers = (%ACTOR_HIDDEN_SIZE_1, %ACTOR_HIDDEN_SIZE_2) | |
agent/ddpg_actor_net.activation_fn = @tf.nn.relu | |
agent/ddpg_actor_net.zero_obs = %ZERO_OBS | |
agent/ddpg_actor_net.images = %IMAGES | |
meta/ddpg_actor_net.hidden_layers = (%ACTOR_HIDDEN_SIZE_1, %ACTOR_HIDDEN_SIZE_2) | |
meta/ddpg_actor_net.activation_fn = @tf.nn.relu | |
meta/ddpg_actor_net.zero_obs = False | |
meta/ddpg_actor_net.images = %IMAGES | |
# critic_net | |
CRITIC_HIDDEN_SIZE_1 = 300 | |
CRITIC_HIDDEN_SIZE_2 = 300 | |
agent/ddpg_critic_net.states_hidden_layers = (%CRITIC_HIDDEN_SIZE_1,) | |
agent/ddpg_critic_net.actions_hidden_layers = None | |
agent/ddpg_critic_net.joint_hidden_layers = (%CRITIC_HIDDEN_SIZE_2,) | |
agent/ddpg_critic_net.weight_decay = 0.0 | |
agent/ddpg_critic_net.activation_fn = @tf.nn.relu | |
agent/ddpg_critic_net.zero_obs = %ZERO_OBS | |
agent/ddpg_critic_net.images = %IMAGES | |
meta/ddpg_critic_net.states_hidden_layers = (%CRITIC_HIDDEN_SIZE_1,) | |
meta/ddpg_critic_net.actions_hidden_layers = None | |
meta/ddpg_critic_net.joint_hidden_layers = (%CRITIC_HIDDEN_SIZE_2,) | |
meta/ddpg_critic_net.weight_decay = 0.0 | |
meta/ddpg_critic_net.activation_fn = @tf.nn.relu | |
meta/ddpg_critic_net.zero_obs = False | |
meta/ddpg_critic_net.images = %IMAGES | |
tf.losses.huber_loss.delta = 1.0 | |
# Sample action | |
uvf_add_noise_fn.stddev = 1.0 | |
meta_add_noise_fn.stddev = %META_EXPLORE_NOISE | |
# Update targets | |
ddpg_update_targets.tau = 0.001 | |
td3_update_targets.tau = 0.005 | |