ledmands commited on
Commit
cbe0575
β€’
1 Parent(s): c37ff18

Added option flags to the watch_agent.py file.

Browse files
agents/version_2/{evaluations_v1.npz β†’ evals/evaluations_v0.npz} RENAMED
File without changes
agents/version_2/{evaluations_v2.npz β†’ evals/evaluations_v1.npz} RENAMED
File without changes
agents/version_2/{evaluations_v3.npz β†’ evals/evaluations_v2.npz} RENAMED
File without changes
agents/version_2/evals/evaluations_v3.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456a8cf62175f6e78ef93d29a63a88d5b7d577fc7c34e62bd2ec4390b3cbb86f
3
+ size 4130
agents/version_2/version_2-3/events.out.tfevents.1715694677.5da7e309893b.24.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3a00c50b58bf881d2e40e8df0ac03b0a16d0b27b4373f9f8c37d954a5cc8dba
3
+ size 2797090
agents/version_2/watch_agent.py CHANGED
@@ -5,23 +5,44 @@ import gymnasium as gym
5
 
6
  import argparse
7
 
 
 
 
8
  # This script should have some options
9
  # 1. Turn off the stochasticity as determined by the ALEv5
10
  # Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
11
  # To compensate for this, we can set the repeat action probability to 0
 
 
 
 
 
 
 
12
 
13
  parser = argparse.ArgumentParser()
14
  parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
 
 
 
15
  args = parser.parse_args()
16
 
17
- MODEL_NAME = "ALE-Pacman-v5"
18
- rpt_act_prob = args.repeat_action_probability
19
-
20
- loaded_model = DQN.load(MODEL_NAME)
 
21
 
22
  # Retrieve the environment
23
- eval_env = Monitor(gym.make("ALE/Pacman-v5", render_mode="rgb_array", repeat_action_probability=rpt_act_prob))
 
 
 
24
 
 
 
 
 
25
  # Evaluate the policy
26
  mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
27
  print("mean rwd: ", mean_rwd)
 
5
 
6
  import argparse
7
 
8
+ MODEL_NAME = "ALE-Pacman-v5"
9
+ loaded_model = DQN.load(MODEL_NAME)
10
+
11
  # This script should have some options
12
  # 1. Turn off the stochasticity as determined by the ALEv5
13
  # Even if deterministic is set to true in evaluate policy, the environment will ignore this 25% of the time
14
  # To compensate for this, we can set the repeat action probability to 0
15
+ # DONE
16
+ # 2. Print out the evaluation metrics or save to file
17
+ # 3. Render in the ALE or not
18
+ # DONE
19
+ # 4. Print the keyword args for the environment? I think this might be helpful...
20
+ # IN PROGRESS
21
+ # 5.
22
 
23
  parser = argparse.ArgumentParser()
24
  parser.add_argument("-r", "--repeat_action_probability", help="repeat action probability", type=float, default=0.25)
25
+ parser.add_argument("-f", "--frameskip", help="frameskip", type=int, default=4)
26
+ parser.add_argument("-o", "--observe", help="observe agent", action="store_const", const=True)
27
+ parser.add_argument("-p", "--print", help="print environment information", action="store_const", const=True)
28
  args = parser.parse_args()
29
 
30
+ # Toggle the render mode based on the -o flag
31
+ if args.observe == True:
32
+ mode = "human"
33
+ else:
34
+ mode = "rgb_array"
35
 
36
  # Retrieve the environment
37
+ eval_env = Monitor(gym.make("ALE/Pacman-v5",
38
+ render_mode=mode,
39
+ repeat_action_probability=args.repeat_action_probability,
40
+ frameskip=args.frameskip,))
41
 
42
+ if args.print == True:
43
+ env_info = str(eval_env.spec).split(", ")
44
+ for item in env_info:
45
+ print(item)
46
  # Evaluate the policy
47
  mean_rwd, std_rwd = evaluate_policy(loaded_model.policy, eval_env, n_eval_episodes=1)
48
  print("mean rwd: ", mean_rwd)