# Visualize environment and custom tasks

In [None]:
import pathlib
import sys
import os
sys.path.append(str(pathlib.Path(os.path.abspath('')).parent))

from envs.custom_dmc_tasks import *
from dm_control import suite
import numpy as np

domain = 'stickman'
task = 'sit_knees'

env = suite.load(domain_name=domain, task_name=task, visualize_reward=True)

action_spec = env.action_spec()

# Define a uniform random policy.
def random_policy(time_step):
 del time_step # Unused.
 return np.random.uniform(low=action_spec.minimum,
 high=action_spec.maximum,
 size=action_spec.shape)

def zero_policy(time_step):
 del time_step
 return np.zeros(action_spec.shape)
 

class GoalSetWrapper:
 def __init__(self, env, goal=None, goal_idx=None):
 self._env = env
 self._env._step_limit = float('inf')
 self._goal = goal
 self._goal_idx = goal_idx

 def step(self, *args, **kwargs):
 if self._goal is not None:
 self.set_goal(self._goal)
 if self._goal_idx is not None:
 self.set_goal_by_idx(self._goal_idx)
 return self._env.step(*args, **kwargs)
 
 def set_goal_by_idx(self, idx_goal):
 cur = self._env.physics.get_state().copy()
 for idx, goal in idx_goal:
 cur[idx] = goal
 self._env.physics.set_state(cur)
 self._env.step(np.zeros_like(self.action_spec().shape))

 def set_goal(self, goal):
 goal = np.array(goal)
 size = self._env.physics.get_state().shape[0] - goal.shape[0]
 self._env.physics.set_state(np.concatenate((goal, np.zeros([size]))))
 self._env.step(np.zeros_like(self.action_spec().shape))

 def __getattr__(self, name: str):
 return getattr(self._env, name)


env = GoalSetWrapper(env)

In [None]:
import matplotlib.pyplot as plt
from envs.custom_dmc_tasks.stickman import StickmanYogaPoses

obs = env.reset()

for _ in range(1):
 env.set_goal(StickmanYogaPoses.sit_knees)

# for _ in range(20):
# obs = env.step(np.random.randn(*env.action_spec().shape))
print('Rew', obs.reward)

print('Upright', env.physics.torso_upright())
print('Torso height', env.physics.torso_height())

plt.imshow(env.physics.render(camera_id=0))

In [None]:
for _ in range(1):
 obs = env.step(np.random.randn(*env.action_spec().shape))

In [None]:
env.physics.named.data.qpos

In [None]:
env.physics.named.data.xpos