Added option flags to the watch_agent.py file.

Browse files

Files changed (6) hide show

agents/version_2/{evaluations_v1.npz → evals/evaluations_v0.npz} +0 -0
agents/version_2/{evaluations_v2.npz → evals/evaluations_v1.npz} +0 -0
agents/version_2/{evaluations_v3.npz → evals/evaluations_v2.npz} +0 -0
agents/version_2/evals/evaluations_v3.npz +3 -0
agents/version_2/version_2-3/events.out.tfevents.1715694677.5da7e309893b.24.0 +3 -0
agents/version_2/watch_agent.py +26 -5

agents/version_2/{evaluations_v1.npz → evals/evaluations_v0.npz} RENAMED Viewed

File without changes

agents/version_2/{evaluations_v2.npz → evals/evaluations_v1.npz} RENAMED Viewed

File without changes

agents/version_2/{evaluations_v3.npz → evals/evaluations_v2.npz} RENAMED Viewed

File without changes

agents/version_2/evals/evaluations_v3.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:456a8cf62175f6e78ef93d29a63a88d5b7d577fc7c34e62bd2ec4390b3cbb86f
+size 4130

agents/version_2/version_2-3/events.out.tfevents.1715694677.5da7e309893b.24.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f3a00c50b58bf881d2e40e8df0ac03b0a16d0b27b4373f9f8c37d954a5cc8dba
+size 2797090

agents/version_2/watch_agent.py CHANGED Viewed

@@ -5,23 +5,44 @@ import gymnasium as gym
 import argparse
 # This script should have some options
 # 1. Turn off the stochasticity as determined by the ALEv5
 #   Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
 #   To compensate for this, we can set the repeat action probability to 0
 parser = argparse.ArgumentParser()
 parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
 args = parser.parse_args()
-MODEL_NAME = "ALE-Pacman-v5"
-rpt_act_prob = args.repeat_action_probability
-loaded_model = DQN.load(MODEL_NAME)
 # Retrieve the environment
-eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="rgb_array", repeat_action_probability=rpt_act_prob))
 # Evaluate the policy
 mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
 print("mean rwd: ", mean_rwd)

 import argparse
+MODEL_NAME = "ALE-Pacman-v5"
+loaded_model = DQN.load(MODEL_NAME)
 # This script should have some options
 # 1. Turn off the stochasticity as determined by the ALEv5
 #   Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
 #   To compensate for this, we can set the repeat action probability to 0
+#       DONE
+# 2. Print out the evaluation metrics or save to file
+# 3. Render in the ALE or not
+#       DONE
+# 4. Print the keyword args for the environment? I think this might be helpful...
+#       IN PROGRESS
+# 5.
 parser = argparse.ArgumentParser()
 parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
+parser.add_argument("-f", "--frameskip", help="frameskip", type=int, default=4)
+parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
+parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
 args = parser.parse_args()
+# Toggle the render mode based on the -o flag
+if args.observe == True:
+    mode = "human"
+else:
+    mode = "rgb_array"
 # Retrieve the environment
+eval_env = Monitor(gym.make("ALE/Pacman-v5",
+                            render_mode=mode,
+                            repeat_action_probability=args.repeat_action_probability,
+                            frameskip=args.frameskip,))
+if args.print == True:
+    env_info = str(eval_env.spec).split(", ")
+    for item in env_info:
+        print(item)
 # Evaluate the policy
 mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
 print("mean rwd: ", mean_rwd)