Spaces:

baiyanlali-zhao
/

NCERL-Diverse-PCG

Sleeping

init

eaf2e33 7 months ago

1.96 kB

	import numpy as np

	from rlkit.envs.mujoco_env import MujocoEnv


	class AntEnv(MujocoEnv):
	def __init__(self, use_low_gear_ratio=True):
	self.init_serialization(locals())
	if use_low_gear_ratio:
	xml_path = 'low_gear_ratio_ant.xml'
	else:
	xml_path = 'normal_gear_ratio_ant.xml'
	super().__init__(
	xml_path,
	frame_skip=5,
	automatically_set_obs_and_action_space=True,
	)

	def step(self, a):
	torso_xyz_before = self.get_body_com("torso")
	self.do_simulation(a, self.frame_skip)
	torso_xyz_after = self.get_body_com("torso")
	torso_velocity = torso_xyz_after - torso_xyz_before
	forward_reward = torso_velocity[0]/self.dt
	ctrl_cost = .5 * np.square(a).sum()
	contact_cost = 0.5 * 1e-3 * np.sum(
	np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
	survive_reward = 1.0
	reward = forward_reward - ctrl_cost - contact_cost + survive_reward
	state = self.state_vector()
	notdone = np.isfinite(state).all() \
	and state[2] >= 0.2 and state[2] <= 1.0
	done = not notdone
	ob = self._get_obs()
	return ob, reward, done, dict(
	reward_forward=forward_reward,
	reward_ctrl=-ctrl_cost,
	reward_contact=-contact_cost,
	reward_survive=survive_reward,
	torso_velocity=torso_velocity,
	)

	def _get_obs(self):
	return np.concatenate([
	self.sim.data.qpos.flat[2:],
	self.sim.data.qvel.flat,
	])

	def reset_model(self):
	qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
	qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
	self.set_state(qpos, qvel)
	return self._get_obs()

	def viewer_setup(self):
	self.viewer.cam.distance = self.model.stat.extent * 0.5