from tqdm import trange | |
def fill_memory(agent, env, num_episodes=500 ): | |
print("Filling up memory....") | |
for _ in trange(500): | |
state = env.reset() | |
done = False | |
while not done: | |
action = agent.act(state) | |
next_state, reward, done, _ = env.step(action) | |
agent.cache(state, next_state, action, reward, done) | |
state = next_state | |
# def train(agent, env, logger): | |
# episodes = 5000 | |
# for e in range(episodes): | |
# state = env.reset() | |
# # Play the game! | |
# while True: | |
# # Run agent on the state | |
# action = agent.act(state) | |
# # Agent performs action | |
# next_state, reward, done, info = env.step(action) | |
# # Remember | |
# agent.cache(state, next_state, action, reward, done) | |
# # Learn | |
# q, loss = agent.learn() | |
# # Logging | |
# logger.log_step(reward, loss, q) | |
# # Update state | |
# state = next_state | |
# # Check if end of game | |
# if done: | |
# break | |
# logger.log_episode(e) | |
# if e % 20 == 0: | |
# logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step) | |
def train(agent, env, logger): | |
episodes = 5000 | |
for e in range(episodes): | |
state = env.reset() | |
# Play the game! | |
for i in range(1000): | |
# Run agent on the state | |
action = agent.act(state) | |
env.render() | |
# Agent performs action | |
next_state, reward, done, info = env.step(action) | |
# Remember | |
agent.cache(state, next_state, action, reward, done, i) | |
# Learn | |
q, loss = agent.learn() | |
# Logging | |
logger.log_step(reward, loss, q) | |
# Update state | |
state = next_state | |
# Check if end of game | |
if done: | |
break | |
logger.log_episode(e) | |
if e % 20 == 0: | |
logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step) | |