Spaces:
Running
Running
import numpy as np | |
class QLearningAgent: | |
def __init__(self, env) -> None: | |
self.env = env | |
self.q_table = self.build_q_table(env.observation_space.n, env.action_space.n) | |
def build_q_table(self, n_states, n_actions): | |
return np.zeros((n_states, n_actions)) | |
def epsilon_greedy_policy(self, state, epsilon): | |
# Epsilon probability of taking a random action or the | |
# action that has the highest Q value for the current state | |
if np.random.random() < epsilon: | |
return np.random.choice(self.env.action_space.n) | |
return np.argmax(self.q_table[state]) | |
def greedy_policy(self, state): | |
return np.argmax(self.q_table[state]) | |
def update_q_table(self, state, action, reward, gamma, learning_rate, new_state): | |
# Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)] | |
current_q = self.q_table[state][action] | |
next_max_q = np.max(self.q_table[new_state]) | |
self.q_table[state][action] = current_q + learning_rate * ( | |
reward + gamma * next_max_q - current_q | |
) | |