Spaces:
Sleeping
Sleeping
File size: 1,818 Bytes
eaf2e33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import numpy as np
import numpy.random as nr
from rlkit.exploration_strategies.base import RawExplorationStrategy
class OUStrategy(RawExplorationStrategy):
"""
This strategy implements the Ornstein-Uhlenbeck process, which adds
time-correlated noise to the actions taken by the deterministic policy.
The OU process satisfies the following stochastic differential equation:
dxt = theta*(mu - xt)*dt + sigma*dWt
where Wt denotes the Wiener process
Based on the rllab implementation.
"""
def __init__(
self,
action_space,
mu=0,
theta=0.15,
max_sigma=0.3,
min_sigma=None,
decay_period=100000,
):
if min_sigma is None:
min_sigma = max_sigma
self.mu = mu
self.theta = theta
self.sigma = max_sigma
self._max_sigma = max_sigma
if min_sigma is None:
min_sigma = max_sigma
self._min_sigma = min_sigma
self._decay_period = decay_period
self.dim = np.prod(action_space.low.shape)
self.low = action_space.low
self.high = action_space.high
self.state = np.ones(self.dim) * self.mu
self.reset()
def reset(self):
self.state = np.ones(self.dim) * self.mu
def evolve_state(self):
x = self.state
dx = self.theta * (self.mu - x) + self.sigma * nr.randn(len(x))
self.state = x + dx
return self.state
def get_action_from_raw_action(self, action, t=0, **kwargs):
ou_state = self.evolve_state()
self.sigma = (
self._max_sigma
- (self._max_sigma - self._min_sigma)
* min(1.0, t * 1.0 / self._decay_period)
)
return np.clip(action + ou_state, self.low, self.high)
|