Spaces:
Running
Running
File size: 2,520 Bytes
0b8359d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
#-*-Python-*-
import gin.tf.external_configurables
create_maze_env.top_down_view = %IMAGES
## Create the agent
AGENT_CLASS = @UvfAgent
UvfAgent.tf_context = %CONTEXT
UvfAgent.actor_net = @agent/ddpg_actor_net
UvfAgent.critic_net = @agent/ddpg_critic_net
UvfAgent.dqda_clipping = 0.0
UvfAgent.td_errors_loss = @tf.losses.huber_loss
UvfAgent.target_q_clipping = %TARGET_Q_CLIPPING
# Create meta agent
META_CLASS = @MetaAgent
MetaAgent.tf_context = %META_CONTEXT
MetaAgent.sub_context = %CONTEXT
MetaAgent.actor_net = @meta/ddpg_actor_net
MetaAgent.critic_net = @meta/ddpg_critic_net
MetaAgent.dqda_clipping = 0.0
MetaAgent.td_errors_loss = @tf.losses.huber_loss
MetaAgent.target_q_clipping = %TARGET_Q_CLIPPING
# Create state preprocess
STATE_PREPROCESS_CLASS = @StatePreprocess
StatePreprocess.ndims = %SUBGOAL_DIM
state_preprocess_net.states_hidden_layers = (100, 100)
state_preprocess_net.num_output_dims = %SUBGOAL_DIM
state_preprocess_net.images = %IMAGES
action_embed_net.num_output_dims = %SUBGOAL_DIM
INVERSE_DYNAMICS_CLASS = @InverseDynamics
# actor_net
ACTOR_HIDDEN_SIZE_1 = 300
ACTOR_HIDDEN_SIZE_2 = 300
agent/ddpg_actor_net.hidden_layers = (%ACTOR_HIDDEN_SIZE_1, %ACTOR_HIDDEN_SIZE_2)
agent/ddpg_actor_net.activation_fn = @tf.nn.relu
agent/ddpg_actor_net.zero_obs = %ZERO_OBS
agent/ddpg_actor_net.images = %IMAGES
meta/ddpg_actor_net.hidden_layers = (%ACTOR_HIDDEN_SIZE_1, %ACTOR_HIDDEN_SIZE_2)
meta/ddpg_actor_net.activation_fn = @tf.nn.relu
meta/ddpg_actor_net.zero_obs = False
meta/ddpg_actor_net.images = %IMAGES
# critic_net
CRITIC_HIDDEN_SIZE_1 = 300
CRITIC_HIDDEN_SIZE_2 = 300
agent/ddpg_critic_net.states_hidden_layers = (%CRITIC_HIDDEN_SIZE_1,)
agent/ddpg_critic_net.actions_hidden_layers = None
agent/ddpg_critic_net.joint_hidden_layers = (%CRITIC_HIDDEN_SIZE_2,)
agent/ddpg_critic_net.weight_decay = 0.0
agent/ddpg_critic_net.activation_fn = @tf.nn.relu
agent/ddpg_critic_net.zero_obs = %ZERO_OBS
agent/ddpg_critic_net.images = %IMAGES
meta/ddpg_critic_net.states_hidden_layers = (%CRITIC_HIDDEN_SIZE_1,)
meta/ddpg_critic_net.actions_hidden_layers = None
meta/ddpg_critic_net.joint_hidden_layers = (%CRITIC_HIDDEN_SIZE_2,)
meta/ddpg_critic_net.weight_decay = 0.0
meta/ddpg_critic_net.activation_fn = @tf.nn.relu
meta/ddpg_critic_net.zero_obs = False
meta/ddpg_critic_net.images = %IMAGES
tf.losses.huber_loss.delta = 1.0
# Sample action
uvf_add_noise_fn.stddev = 1.0
meta_add_noise_fn.stddev = %META_EXPLORE_NOISE
# Update targets
ddpg_update_targets.tau = 0.001
td3_update_targets.tau = 0.005
|