00BER
/

ml-reinforcement-learning

Upload 36 files

e085e3b almost 2 years ago

2.24 kB

	from tqdm import trange

	def fill_memory(agent, env, num_episodes=500 ):
	print("Filling up memory....")
	for _ in trange(500):
	state = env.reset()
	done = False
	while not done:
	action = agent.act(state)
	next_state, reward, done, _ = env.step(action)
	agent.cache(state, next_state, action, reward, done)
	state = next_state


	# def train(agent, env, logger):
	# episodes = 5000
	# for e in range(episodes):

	# state = env.reset()
	# # Play the game!
	# while True:

	# # Run agent on the state
	# action = agent.act(state)

	# # Agent performs action
	# next_state, reward, done, info = env.step(action)

	# # Remember
	# agent.cache(state, next_state, action, reward, done)

	# # Learn
	# q, loss = agent.learn()

	# # Logging
	# logger.log_step(reward, loss, q)

	# # Update state
	# state = next_state

	# # Check if end of game
	# if done:
	# break

	# logger.log_episode(e)

	# if e % 20 == 0:
	# logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)


	def train(agent, env, logger):
	episodes = 5000
	for e in range(episodes):

	state = env.reset()
	# Play the game!
	for i in range(1000):

	# Run agent on the state
	action = agent.act(state)
	env.render()
	# Agent performs action
	next_state, reward, done, info = env.step(action)

	# Remember
	agent.cache(state, next_state, action, reward, done, i)

	# Learn
	q, loss = agent.learn()

	# Logging
	logger.log_step(reward, loss, q)

	# Update state
	state = next_state

	# Check if end of game
	if done:
	break

	logger.log_episode(e)

	if e % 20 == 0:
	logger.record(episode=e, epsilon=agent.exploration_rate, step=agent.curr_step)