Spaces:
Sleeping
Sleeping
import numpy as np | |
import numpy.random as nr | |
from rlkit.exploration_strategies.base import RawExplorationStrategy | |
class OUStrategy(RawExplorationStrategy): | |
""" | |
This strategy implements the Ornstein-Uhlenbeck process, which adds | |
time-correlated noise to the actions taken by the deterministic policy. | |
The OU process satisfies the following stochastic differential equation: | |
dxt = theta*(mu - xt)*dt + sigma*dWt | |
where Wt denotes the Wiener process | |
Based on the rllab implementation. | |
""" | |
def __init__( | |
self, | |
action_space, | |
mu=0, | |
theta=0.15, | |
max_sigma=0.3, | |
min_sigma=None, | |
decay_period=100000, | |
): | |
if min_sigma is None: | |
min_sigma = max_sigma | |
self.mu = mu | |
self.theta = theta | |
self.sigma = max_sigma | |
self._max_sigma = max_sigma | |
if min_sigma is None: | |
min_sigma = max_sigma | |
self._min_sigma = min_sigma | |
self._decay_period = decay_period | |
self.dim = np.prod(action_space.low.shape) | |
self.low = action_space.low | |
self.high = action_space.high | |
self.state = np.ones(self.dim) * self.mu | |
self.reset() | |
def reset(self): | |
self.state = np.ones(self.dim) * self.mu | |
def evolve_state(self): | |
x = self.state | |
dx = self.theta * (self.mu - x) + self.sigma * nr.randn(len(x)) | |
self.state = x + dx | |
return self.state | |
def get_action_from_raw_action(self, action, t=0, **kwargs): | |
ou_state = self.evolve_state() | |
self.sigma = ( | |
self._max_sigma | |
- (self._max_sigma - self._min_sigma) | |
* min(1.0, t * 1.0 / self._decay_period) | |
) | |
return np.clip(action + ou_state, self.low, self.high) | |