gabehubner commited on
Commit
ec3a146
1 Parent(s): 569299e

add gitignore and training loop class

Browse files
.DS_Store DELETED
Binary file (8.2 kB)
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
__pycache__/ddpg.cpython-311.pyc CHANGED
Binary files a/__pycache__/ddpg.cpython-311.pyc and b/__pycache__/ddpg.cpython-311.pyc differ
 
__pycache__/train.cpython-311.pyc ADDED
Binary file (7.28 kB). View file
 
ddpg.py CHANGED
@@ -176,20 +176,23 @@ class Agent(object):
176
  self.target_critic = CriticNetwork(beta, input_dims, layer1_size, layer2_size, n_actions=n_actions, name="target_critic")
177
 
178
  self.noise = OUActionNoise(mu=np.zeros(n_actions))
 
 
 
179
 
180
  self.update_network_parameters(tau=1)
181
 
182
- def choose_action(self, observation, attribution : IntegratedGradients = None, baseline : np.ndarray=None):
183
  self.actor.eval()
184
  observation = T.tensor(observation, dtype=T.float).to(self.actor.device)
185
  print(f"Observation: {observation.shape=}")
186
  mu = self.actor(observation).to(self.actor.device)
187
 
188
- if attribution is not None:
189
- if baseline is None:
190
- baseline = T.zeros(observation.shape)
191
- attributions = attribution.attribute((observation), baselines=baseline, target=0)
192
- print('Attributions:', attributions)
193
 
194
 
195
  mu_prime = mu + T.tensor(self.noise(), dtype=T.float).to(self.actor.device)
 
176
  self.target_critic = CriticNetwork(beta, input_dims, layer1_size, layer2_size, n_actions=n_actions, name="target_critic")
177
 
178
  self.noise = OUActionNoise(mu=np.zeros(n_actions))
179
+
180
+ self.attributions = None
181
+ self.ig = None
182
 
183
  self.update_network_parameters(tau=1)
184
 
185
+ def choose_action(self, observation, baseline : T.Tensor=None):
186
  self.actor.eval()
187
  observation = T.tensor(observation, dtype=T.float).to(self.actor.device)
188
  print(f"Observation: {observation.shape=}")
189
  mu = self.actor(observation).to(self.actor.device)
190
 
191
+ # if attribution is not None:
192
+ # if baseline is None:
193
+ # baseline = T.zeros(observation.shape)
194
+ # attributions = attribution.attribute((observation), baselines=baseline, target=0)
195
+ # print('Attributions:', attributions)
196
 
197
 
198
  mu_prime = mu + T.tensor(self.noise(), dtype=T.float).to(self.actor.device)
main.py CHANGED
@@ -7,7 +7,8 @@ import argparse
7
  from train import TrainingLoop
8
  from captum.attr import (IntegratedGradients, LayerConductance, NeuronAttribution)
9
 
10
- training_loop = TrainingLoop()
 
11
 
12
  parser = argparse.ArgumentParser(description="Choose a function to run.")
13
  parser.add_argument("function", choices=["train", "load-trained", "attribute"], help="The function to run.")
 
7
  from train import TrainingLoop
8
  from captum.attr import (IntegratedGradients, LayerConductance, NeuronAttribution)
9
 
10
+ training_loop = TrainingLoop(env_spec="LunarLander-v2", continuous=True, gravity=-10, render_mode=None)
11
+ training_loop.create_agent()
12
 
13
  parser = argparse.ArgumentParser(description="Choose a function to run.")
14
  parser.add_argument("function", choices=["train", "load-trained", "attribute"], help="The function to run.")
tmp/ddpg/actor_ddpg CHANGED
Binary files a/tmp/ddpg/actor_ddpg and b/tmp/ddpg/actor_ddpg differ
 
tmp/ddpg/critic_ddpg CHANGED
Binary files a/tmp/ddpg/critic_ddpg and b/tmp/ddpg/critic_ddpg differ
 
tmp/ddpg/target_actor_ddpg CHANGED
Binary files a/tmp/ddpg/target_actor_ddpg and b/tmp/ddpg/target_actor_ddpg differ
 
tmp/ddpg/target_critic_ddpg CHANGED
Binary files a/tmp/ddpg/target_critic_ddpg and b/tmp/ddpg/target_critic_ddpg differ
 
train.py CHANGED
@@ -3,69 +3,75 @@ import gymnasium as gym
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import torch
6
- import argparse
7
  from captum.attr import (IntegratedGradients)
8
 
9
 
10
  class TrainingLoop:
11
- def __init__(self):
12
- pass
 
 
 
 
 
13
 
14
- def train(self):
15
- env = gym.make(
16
- "LunarLander-v2",
17
- continuous = True,
18
- gravity = -10.0,
19
- render_mode = None
20
  )
21
 
22
- agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4)
23
- agent.load_models()
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- np.random.seed(0)
26
  score_history = []
27
 
28
  for i in range(1000):
29
  done = False
30
  score = 0
31
- obs, _ = env.reset()
32
  while not done:
33
- act = agent.choose_action(obs)
34
- new_state, reward, terminated, truncated, info = env.step(act)
35
  done = terminated or truncated
36
- agent.remember(obs, act, reward, new_state, int(done))
37
- agent.learn()
38
  score += reward
39
  obs = new_state
40
 
41
  score_history.append(score)
42
  print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
43
  if i % 25 == 0:
44
- agent.save_models()
 
 
45
 
46
 
47
  def load_trained(self):
48
- env = gym.make(
49
- "LunarLanderContinuous-v2",
50
- render_mode = "human"
51
- )
52
 
53
- agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4)
54
- agent.load_models()
55
 
56
- np.random.seed(0)
57
  score_history = []
58
 
59
  for i in range(50):
60
  done = False
61
  score = 0
62
- obs, _ = env.reset()
63
 
64
-
65
-
66
  while not done:
67
- act = agent.choose_action(obs)
68
- new_state, reward, terminated, truncated, info = env.step(act)
69
  done = terminated or truncated
70
  score += reward
71
  obs = new_state
@@ -73,42 +79,42 @@ class TrainingLoop:
73
  score_history.append(score)
74
  print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
75
 
 
 
76
  # Model Explainability
77
 
78
  from captum.attr import (IntegratedGradients)
79
 
80
  def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor:
81
- env = gym.make(
82
- "LunarLanderContinuous-v2",
83
- render_mode = None
84
- )
85
-
86
- agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4)
87
- agent.load_models()
88
 
89
- torch.manual_seed(0)
90
 
91
  sum_obs = torch.zeros(8)
92
 
93
  for i in range(num_iterations):
94
  done = False
95
  score = 0
96
- obs, _ = env.reset()
97
 
98
  sum_obs += obs
99
  print(f"Baseline on interation #{i}: {obs}")
100
 
101
  while not done:
102
- act = agent.choose_action(obs, attribution=None, baseline=None)
103
- new_state, reward, terminated, truncated, info = env.step(act)
104
  done = terminated or truncated
105
  score += reward
106
  obs = new_state
107
 
 
 
108
  return sum_obs / num_iterations
109
 
110
 
111
  def explain_trained(self, option: str, num_iterations :int = 10) -> None:
 
 
112
  baseline_options = {
113
  "1": torch.zeros(8),
114
  "2": self._collect_running_baseline_average(num_iterations),
@@ -116,27 +122,20 @@ class TrainingLoop:
116
 
117
  baseline = baseline_options[option]
118
 
119
- env = gym.make(
120
- "LunarLanderContinuous-v2",
121
- render_mode = "human"
122
- )
123
 
124
- agent = Agent(alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, env=env, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4)
 
125
 
126
- agent.load_models()
127
-
128
- ig = IntegratedGradients(agent.actor)
129
-
130
- np.random.seed(0)
131
  score_history = []
132
 
133
  for i in range(50):
134
  done = False
135
  score = 0
136
- obs, _ = env.reset()
137
  while not done:
138
- act = agent.choose_action(obs, attribution=ig, baseline=baseline)
139
- new_state, reward, terminated, truncated, info = env.step(act)
140
  done = terminated or truncated
141
  score += reward
142
  obs = new_state
@@ -144,3 +143,7 @@ class TrainingLoop:
144
  score_history.append(score)
145
  print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
146
 
 
 
 
 
 
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
  import torch
 
6
  from captum.attr import (IntegratedGradients)
7
 
8
 
9
  class TrainingLoop:
10
+ def __init__(self, env_spec, output_path='./output/', seed=0, **kwargs):
11
+ assert env_spec in gym.envs.registry.keys()
12
+ defaults = {
13
+ "continuous": True,
14
+ "gravity": -10.0,
15
+ "render_mode": None
16
+ }
17
 
18
+ self.env = gym.make(
19
+ env_spec,
20
+ **defaults
 
 
 
21
  )
22
 
23
+ torch.manual_seed(seed)
24
+
25
+ self.agent = None
26
+ self.output_path = output_path
27
+
28
+ # TODO: spec-to-hyperparameters look-up
29
+ def create_agent(self, alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4):
30
+ self.agent = Agent(alpha=alpha, beta=beta, input_dims=input_dims, tau=tau, env=self.env, batch_size=batch_size, layer1_size=layer1_size, layer2_size=layer2_size, n_actions=n_actions)
31
+
32
+ def train(self):
33
+ assert self.agent is not None
34
+
35
+ self.agent.load_models()
36
 
 
37
  score_history = []
38
 
39
  for i in range(1000):
40
  done = False
41
  score = 0
42
+ obs, _ = self.env.reset()
43
  while not done:
44
+ act = self.agent.choose_action(obs)
45
+ new_state, reward, terminated, truncated, info = self.env.step(act)
46
  done = terminated or truncated
47
+ self.agent.remember(obs, act, reward, new_state, int(done))
48
+ self.agent.learn()
49
  score += reward
50
  obs = new_state
51
 
52
  score_history.append(score)
53
  print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
54
  if i % 25 == 0:
55
+ self.agent.save_models()
56
+
57
+ self.env.close()
58
 
59
 
60
  def load_trained(self):
61
+ assert self.agent is not None
 
 
 
62
 
63
+ self.agent.load_models()
 
64
 
 
65
  score_history = []
66
 
67
  for i in range(50):
68
  done = False
69
  score = 0
70
+ obs, _ = self.env.reset()
71
 
 
 
72
  while not done:
73
+ act = self.agent.choose_action(obs)
74
+ new_state, reward, terminated, truncated, info = self.env.step(act)
75
  done = terminated or truncated
76
  score += reward
77
  obs = new_state
 
79
  score_history.append(score)
80
  print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
81
 
82
+ self.env.close()
83
+
84
  # Model Explainability
85
 
86
  from captum.attr import (IntegratedGradients)
87
 
88
  def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor:
89
+ assert self.agent is not None
 
 
 
 
 
 
90
 
91
+ self.agent.load_models()
92
 
93
  sum_obs = torch.zeros(8)
94
 
95
  for i in range(num_iterations):
96
  done = False
97
  score = 0
98
+ obs, _ = self.env.reset()
99
 
100
  sum_obs += obs
101
  print(f"Baseline on interation #{i}: {obs}")
102
 
103
  while not done:
104
+ act = self.agent.choose_action(obs, attribution=None, baseline=None)
105
+ new_state, reward, terminated, truncated, info = self.env.step(act)
106
  done = terminated or truncated
107
  score += reward
108
  obs = new_state
109
 
110
+ self.env.close()
111
+
112
  return sum_obs / num_iterations
113
 
114
 
115
  def explain_trained(self, option: str, num_iterations :int = 10) -> None:
116
+ assert self.agent is not None
117
+
118
  baseline_options = {
119
  "1": torch.zeros(8),
120
  "2": self._collect_running_baseline_average(num_iterations),
 
122
 
123
  baseline = baseline_options[option]
124
 
125
+ self.agent.load_models()
 
 
 
126
 
127
+ ig = IntegratedGradients(self.agent.actor)
128
+ self.agent.ig = ig
129
 
 
 
 
 
 
130
  score_history = []
131
 
132
  for i in range(50):
133
  done = False
134
  score = 0
135
+ obs, _ = self.env.reset()
136
  while not done:
137
+ act = self.agent.choose_action(obs, baseline=baseline)
138
+ new_state, reward, terminated, truncated, info = self.env.step(act)
139
  done = terminated or truncated
140
  score += reward
141
  obs = new_state
 
143
  score_history.append(score)
144
  print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
145
 
146
+ self.env.close()
147
+
148
+ return self.agent.attributions
149
+