|
from stable_baselines3 import DQN |
|
from stable_baselines3.common.evaluation import evaluate_policy |
|
from stable_baselines3.common.monitor import Monitor |
|
import gymnasium as gym |
|
|
|
import argparse |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability, default 0.25", type=float, default=0.25) |
|
parser.add_argument("-f", "--frameskip", help="frameskip, default 4", type=int, default=4) |
|
parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True) |
|
parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True) |
|
parser.add_argument("-e", "--num_episodes", help="specify the number of episodes to evaluate, default 1", type=int, default=1) |
|
parser.add_argument("-a", "--agent_filepath", help="file path to agent to watch, minus the .zip extension", type=str, required=True) |
|
args = parser.parse_args() |
|
|
|
MODEL_NAME = args.agent_filepath |
|
loaded_model = DQN.load(MODEL_NAME) |
|
|
|
|
|
if args.observe == True: |
|
mode = "human" |
|
else: |
|
mode = "rgb_array" |
|
|
|
|
|
eval_env = Monitor(gym.make("ALE/Pacman-v5", |
|
render_mode=mode, |
|
repeat_action_probability=args.repeat_action_probability, |
|
frameskip=args.frameskip,)) |
|
|
|
if args.print == True: |
|
env_info = str(eval_env.spec).split(", ") |
|
for item in env_info: |
|
print(item) |
|
|
|
mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=args.num_episodes) |
|
print("eval episodes: ", args.num_episodes) |
|
print("mean rwd: ", mean_rwd) |
|
print("std rwd: ", std_rwd) |