Spaces:

gabehubner
/

deep-rl-explainability

Sleeping

App Files Files Community

gabehubner commited on Nov 30, 2023

Commit

ee1c253

•

1 Parent(s): f6f3371

add requirements

Browse files

Files changed (11) hide show

__pycache__/ddpg.cpython-311.pyc +0 -0
__pycache__/train.cpython-311.pyc +0 -0
app.py +3 -0
ddpg.py +2 -6
main.py +6 -3
requirements.txt +392 -0
tmp/ddpg/actor_ddpg +0 -0
tmp/ddpg/critic_ddpg +0 -0
tmp/ddpg/target_actor_ddpg +0 -0
tmp/ddpg/target_critic_ddpg +0 -0
train.py +79 -8

__pycache__/ddpg.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/ddpg.cpython-311.pyc and b/__pycache__/ddpg.cpython-311.pyc differ

__pycache__/train.cpython-311.pyc CHANGED Viewed

Binary files a/__pycache__/train.cpython-311.pyc and b/__pycache__/train.cpython-311.pyc differ

app.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import gradio as gr
2	+ from train import TrainingLoop
3	+

ddpg.py CHANGED Viewed

@@ -144,10 +144,6 @@ class ActorNetwork(nn.Module):
     def forward(self, state):
-        try:
-            assert state.shape == T.Size([8])
-        except AssertionError:
-            raise Exception(f"Wrong shape {state.shape=}")
         x = self.fc1(state)
         x = self.bn1(x)
@@ -182,7 +178,7 @@ class Agent(object):
         self.noise = OUActionNoise(mu=np.zeros(n_actions))
-        self.attributions = None
         self.ig : IntegratedGradients = None
         self.update_network_parameters(tau=1)
@@ -195,7 +191,7 @@ class Agent(object):
         if self.ig is not None:
              attribution = self.ig.attribute(observation, baselines=baseline, n_steps=1)
-             print('Attributions:', attribution)
         mu_prime = mu + T.tensor(self.noise(), dtype=T.float).to(self.actor.device)

     def forward(self, state):
         x = self.fc1(state)
         x = self.bn1(x)
         self.noise = OUActionNoise(mu=np.zeros(n_actions))
+        self.attributions = []
         self.ig : IntegratedGradients = None
         self.update_network_parameters(tau=1)
         if self.ig is not None:
              attribution = self.ig.attribute(observation, baselines=baseline, n_steps=1)
+             self.attributions.append(attribution)
         mu_prime = mu + T.tensor(self.noise(), dtype=T.float).to(self.actor.device)

main.py CHANGED Viewed

@@ -7,11 +7,11 @@ import argparse
 from train import TrainingLoop
 from captum.attr import (IntegratedGradients, LayerConductance, NeuronAttribution)
-training_loop = TrainingLoop(env_spec="LunarLander-v2", continuous=True, gravity=-10, render_mode=None)
 training_loop.create_agent()
 parser = argparse.ArgumentParser(description="Choose a function to run.")
-parser.add_argument("function", choices=["train", "load-trained", "attribute"], help="The function to run.")
 args = parser.parse_args()
@@ -20,4 +20,7 @@ if args.function == "train":
 elif args.function == "load-trained":
     training_loop.load_trained()
 elif args.function == "attribute":
-    training_loop.explain_trained(option="2", num_iterations=10)

 from train import TrainingLoop
 from captum.attr import (IntegratedGradients, LayerConductance, NeuronAttribution)
+training_loop = TrainingLoop(env_spec="LunarLander-v2", continuous=True, gravity=-10)
 training_loop.create_agent()
 parser = argparse.ArgumentParser(description="Choose a function to run.")
+parser.add_argument("function", choices=["train", "load-trained", "attribute", "video"], help="The function to run.")
 args = parser.parse_args()
 elif args.function == "load-trained":
     training_loop.load_trained()
 elif args.function == "attribute":
+    frames, attributions = training_loop.explain_trained(option="2", num_iterations=10)
+elif args.function == "video":
+    training_loop.render_video(20)

requirements.txt ADDED Viewed

	@@ -0,0 +1,392 @@

+absl-py==2.0.0
+aiofiles==23.2.1
+aiohttp==3.8.5
+aiosignal==1.3.1
+alabaster==0.7.13
+ale-py==0.8.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==3.7.1
+appdirs==1.4.4
+appnope==0.1.3
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+astatine==0.3.3
+astor==0.8.1
+astpretty==3.0.0
+astroid==2.15.8
+asttokens==2.4.0
+astunparse==1.6.3
+async-timeout==4.0.3
+attrs==23.1.0
+autoflake==1.7.8
+AutoROM==0.4.2
+AutoROM.accept-rom-license==0.6.1
+Babel==2.13.0
+backcall==0.2.0
+bandit==1.7.5
+beautifulsoup4==4.12.2
+bitmath==1.3.3.1
+black==23.10.0
+bleach==6.1.0
+box2d-py==2.3.5
+Brotli==1.1.0
+cachetools==5.3.1
+captum==0.6.0
+certifi==2023.7.22
+cffi==1.16.0
+chardet==4.0.0
+charset-normalizer==3.3.0
+chess==1.9.4
+click==7.1.2
+cloudpickle==1.3.0
+cmake==3.27.7
+cognitive-complexity==1.3.0
+colorama==0.4.6
+comm==0.1.4
+contourpy==1.1.1
+coverage==7.3.2
+cycler==0.12.0
+darglint==1.8.1
+debugpy==1.8.0
+decorator==4.4.2
+defusedxml==0.7.1
+deprecation==2.1.0
+DI-engine==0.4.9
+DI-toolkit==0.2.0
+DI-treetensor==0.4.1
+dill==0.3.7
+distlib==0.3.7
+dlint==0.14.1
+doc8==1.1.1
+docformatter==1.7.5
+docker-pycreds==0.4.0
+docutils==0.19
+domdf-python-tools==3.6.1
+easydict==1.9
+entrypoints==0.4
+enum-tools==0.11.0
+eradicate==2.3.0
+executing==2.0.0
+Farama-Notifications==0.0.4
+fastapi==0.104.0
+fastjsonschema==2.18.1
+ffmpeg==1.4
+ffmpy==0.3.1
+filelock==3.12.4
+flake8==5.0.4
+flake8-2020==1.8.1
+flake8-aaa==0.16.0
+flake8-annotations==3.0.1
+flake8-annotations-complexity==0.0.8
+flake8-annotations-coverage==0.0.6
+flake8-bandit==4.1.1
+flake8-black==0.3.6
+flake8-blind-except==0.2.1
+flake8-breakpoint==1.1.0
+flake8-broken-line==0.6.0
+flake8-bugbear==23.3.12
+flake8-builtins==1.5.3
+flake8-class-attributes-order==0.1.3
+flake8-coding==1.3.2
+flake8-cognitive-complexity==0.1.0
+flake8-comments==0.1.2
+flake8-comprehensions==3.14.0
+flake8-debugger==4.1.2
+flake8-django==1.4
+flake8-docstrings==1.7.0
+flake8-encodings==0.5.0.post1
+flake8-eradicate==1.5.0
+flake8-executable==2.1.3
+flake8-expression-complexity==0.0.11
+flake8-fastapi==0.7.0
+flake8-fixme==1.1.1
+flake8-functions==0.0.8
+flake8-functions-names==0.4.0
+flake8-future-annotations==0.0.5
+flake8-helper==0.2.1
+flake8-isort==6.1.0
+flake8-literal==1.3.0
+flake8-logging-format==0.9.0
+flake8-markdown==0.5.0
+flake8-mutable==1.2.0
+flake8-no-pep420==2.7.0
+flake8-noqa==1.3.2
+flake8-pie==0.16.0
+flake8-plugin-utils==1.3.3
+flake8-pyi==22.11.0
+flake8-pylint==0.2.1
+flake8-pytest-style==1.7.2
+flake8-quotes==3.3.2
+flake8-rst-docstrings==0.3.0
+flake8-secure-coding-standard==1.4.0
+flake8-string-format==0.3.0
+flake8-tidy-imports==4.10.0
+flake8-typing-imports==1.15.0
+flake8-use-fstring==1.4
+flake8-use-pathlib==0.3.0
+flake8-useless-assert==0.4.4
+flake8-variables-names==0.0.6
+flake8-warnings==0.4.0
+flake8_simplify==0.21.0
+Flask==1.1.4
+Flask-Compress==1.14
+flatbuffers==23.5.26
+fonttools==4.43.1
+fqdn==1.5.1
+frozenlist==1.4.0
+fsspec==2023.9.2
+future==0.18.3
+gast==0.5.4
+gitdb==4.0.11
+GitPython==3.1.40
+glfw==2.6.2
+google-auth==2.23.3
+google-auth-oauthlib==1.0.0
+google-pasta==0.2.0
+gradio==4.7.1
+gradio_client==0.7.0
+graphviz==0.20.1
+grpcio==1.59.0
+gym==0.25.1
+gym-notices==0.0.8
+gymnasium==0.29.1
+h11==0.14.0
+h5py==3.10.0
+hbutils==0.9.1
+hickle==5.0.2
+httpcore==1.0.2
+httpx==0.25.2
+huggingface-hub==0.19.4
+hypothesis==6.88.1
+hypothesmith==0.1.9
+idna==3.4
+imageio==2.31.5
+imageio-ffmpeg==0.4.9
+imagesize==1.4.1
+importlib-metadata==6.8.0
+importlib-resources==6.1.0
+iniconfig==2.0.0
+ipykernel==6.25.2
+ipython==8.16.1
+ipython-genutils==0.2.0
+ipywidgets==8.1.1
+isoduration==20.11.0
+isort==5.12.0
+itsdangerous==1.1.0
+jedi==0.19.1
+Jinja2==2.11.3
+joblib==1.3.2
+jsonpointer==2.4
+jsonschema==4.19.2
+jsonschema-specifications==2023.7.1
+jupyter==1.0.0
+jupyter-console==6.6.3
+jupyter-events==0.9.0
+jupyter_client==7.4.9
+jupyter_core==5.3.2
+jupyter_server==2.10.0
+jupyter_server_terminals==0.4.4
+jupyterlab-flake8==0.7.1
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==3.0.9
+keras==2.14.0
+keras-rl==0.4.2
+kiwisolver==1.4.5
+lark-parser==0.12.0
+lazy-object-proxy==1.9.0
+libclang==16.0.6
+libcst==0.4.10
+llvmlite==0.41.1
+Markdown==3.5
+markdown-it-py==3.0.0
+MarkupSafe==2.0.1
+matplotlib==3.8.0
+matplotlib-inline==0.1.6
+mccabe==0.7.0
+mdurl==0.1.2
+mediapy==1.1.9
+mistune==0.8.4
+ml-dtypes==0.2.0
+moviepy==1.0.3
+mpire==2.8.0
+mpmath==1.3.0
+mr-proper==0.0.7
+mujoco==2.3.7
+multidict==6.0.4
+mypy-extensions==1.0.0
+natsort==8.4.0
+nbclassic==1.0.0
+nbclient==0.5.13
+nbconvert==6.4.5
+nbformat==5.9.2
+nest-asyncio==1.5.8
+networkx==3.1
+notebook==6.5.6
+notebook_shim==0.2.3
+numba==0.58.1
+numpy==1.26.0
+oauthlib==3.2.2
+opencv-python==4.8.1.78
+opt-einsum==3.3.0
+orjson==3.9.10
+overcooked-ai==1.1.0
+overrides==7.4.0
+packaging==23.2
+pandas==2.1.1
+pandas-vet==0.2.3
+pandocfilters==1.5.0
+parso==0.8.3
+pathspec==0.11.2
+pathtools==0.1.2
+pbr==5.11.1
+pep8-naming==0.13.3
+pettingzoo==1.24.1
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==10.0.1
+platformdirs==3.11.0
+pluggy==1.3.0
+proglog==0.1.10
+prometheus-client==0.18.0
+prompt-toolkit==3.0.39
+protobuf==4.24.4
+psutil==5.9.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyasn1==0.5.0
+pyasn1-modules==0.3.0
+pybetter==0.4.1
+pycln==2.3.0
+pycodestyle==2.9.1
+pycparser==2.21
+pydantic==2.4.2
+pydantic_core==2.10.1
+pydocstyle==6.3.0
+pydub==0.25.1
+pyemojify==0.2.0
+pyflakes==2.5.0
+pygame==2.3.0
+pyglet==2.0.0
+Pygments==2.16.1
+pylint==2.17.7
+pynng==0.7.2
+PyOpenGL==3.1.7
+pyparsing==3.1.1
+pyproject-api==1.6.1
+pytest==7.4.3
+pytest-cov==4.1.0
+pytest-sugar==0.9.7
+python-dateutil==2.8.2
+python-dev-tools==2023.3.24
+python-dotenv==1.0.0
+python-json-logger==2.0.7
+python-multipart==0.0.6
+pytimeparse==1.1.8
+pytz==2023.3.post1
+pyupgrade==3.15.0
+PyVirtualDisplay==3.0
+PyYAML==6.0.1
+pyzmq==24.0.1
+qtconsole==5.5.0
+QtPy==2.4.1
+redis==5.0.1
+referencing==0.30.2
+removestar==1.5
+requests==2.31.0
+requests-oauthlib==1.3.1
+responses==0.12.1
+restructuredtext-lint==1.4.0
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.6.0
+rlcard==1.0.5
+rpds-py==0.12.0
+rsa==4.9
+sb3-contrib==2.1.0
+scikit-learn==1.3.1
+scipy==1.11.3
+seaborn==0.13.0
+semantic-version==2.10.0
+Send2Trash==1.8.2
+sentry-sdk==1.32.0
+setproctitle==1.3.3
+shellingham==1.5.4
+Shimmy==1.3.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.0
+snowballstemmer==2.2.0
+sortedcontainers==2.4.0
+soupsieve==2.5
+Sphinx==6.2.1
+sphinxcontrib-applehelp==1.0.7
+sphinxcontrib-devhelp==1.0.5
+sphinxcontrib-htmlhelp==2.0.4
+sphinxcontrib-jsmath==1.0.1
+sphinxcontrib-qthelp==1.0.6
+sphinxcontrib-serializinghtml==1.1.9
+ssort==0.11.6
+stable-baselines3==2.1.0
+stack-data==0.6.3
+starlette==0.27.0
+stdlib-list==0.9.0
+stevedore==5.1.0
+swig==4.1.1
+sympy==1.12
+tabulate==0.9.0
+tensorboard==2.14.1
+tensorboard-data-server==0.7.1
+tensorboardX==2.6.2.2
+tensordict==0.2.0
+tensordict-nightly==2023.10.6
+tensorflow==2.14.0
+tensorflow-estimator==2.14.0
+tensorflow-io-gcs-filesystem==0.34.0
+tensorflow-macos==2.14.0
+tensorflow-metal==1.1.0
+termcolor==2.3.0
+terminado==0.17.1
+testpath==0.6.0
+threadpoolctl==3.2.0
+tinycss2==1.2.1
+tokenize-rt==5.2.0
+tomlkit==0.12.0
+toolz==0.12.0
+torch==2.1.0
+torchrl @ git+https://github.com/pytorch/rl.git@bf264e0e24971fc05ec42b571de7b8df84043a51
+torchsnapshot==0.1.0
+torchvision==0.16.0
+tornado==6.3.3
+tox==4.11.3
+tox-travis==0.12
+tqdm==4.66.1
+traitlets==5.11.2
+treevalue==1.4.12
+trueskill==0.4.5
+typer==0.9.0
+types-python-dateutil==2.8.19.14
+typing-inspect==0.9.0
+typing_extensions==4.8.0
+tzdata==2023.3
+Unidecode==1.3.7
+untokenize==0.1.1
+uri-template==1.3.0
+urllib3==2.0.6
+URLObject==2.4.3
+uvicorn==0.24.0.post1
+virtualenv==20.24.5
+wandb==0.15.12
+wcwidth==0.2.8
+webcolors==1.13
+webencodings==0.5.1
+websocket-client==1.6.4
+websockets==11.0.3
+Werkzeug==1.0.1
+widgetsnbextension==4.0.9
+wrapt==1.14.1
+yapf==0.29.0
+yarl==1.9.2
+yattag==1.15.1
+zipp==3.17.0

tmp/ddpg/actor_ddpg CHANGED Viewed

Binary files a/tmp/ddpg/actor_ddpg and b/tmp/ddpg/actor_ddpg differ

tmp/ddpg/critic_ddpg CHANGED Viewed

Binary files a/tmp/ddpg/critic_ddpg and b/tmp/ddpg/critic_ddpg differ

tmp/ddpg/target_actor_ddpg CHANGED Viewed

Binary files a/tmp/ddpg/target_actor_ddpg and b/tmp/ddpg/target_actor_ddpg differ

tmp/ddpg/target_critic_ddpg CHANGED Viewed

Binary files a/tmp/ddpg/target_critic_ddpg and b/tmp/ddpg/target_critic_ddpg differ

train.py CHANGED Viewed

@@ -4,24 +4,23 @@ import numpy as np
 import matplotlib.pyplot as plt
 import torch
 from captum.attr import (IntegratedGradients)
 class TrainingLoop:
     def __init__(self, env_spec, output_path='./output/', seed=0, **kwargs):
         assert env_spec in gym.envs.registry.keys()
-        defaults = {
             "continuous": True,
             "gravity": -10.0,
             "render_mode": None
         }
-        defaults.update(**kwargs)
-        self.env = gym.make(
-            env_spec,
-            **defaults
-        )
         torch.manual_seed(seed)
@@ -35,7 +34,13 @@ class TrainingLoop:
     def train(self):
         assert self.agent is not None
-        self.agent.load_models()
         score_history = []
@@ -63,6 +68,12 @@ class TrainingLoop:
     def load_trained(self):
         assert self.agent is not None
         self.agent.load_models()
         score_history = []
@@ -84,12 +95,55 @@ class TrainingLoop:
         self.env.close()
     # Model Explainability
     from captum.attr import (IntegratedGradients)
     def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor:
         assert self.agent is not None
         print("--------- Collecting running baseline average ----------")
         self.agent.load_models()
@@ -129,6 +183,13 @@ class TrainingLoop:
         baseline = baseline_options[option]
         print("\n\n\n\n--------- Performing Attributions -----------")
         self.agent.load_models()
@@ -139,22 +200,32 @@ class TrainingLoop:
         self.agent.ig = ig
         score_history = []
         for i in range(50):
             done = False
             score = 0
             obs, _ = self.env.reset()
             while not done:
                 act = self.agent.choose_action(observation=obs, baseline=baseline)
                 new_state, reward, terminated, truncated, info = self.env.step(act)
                 done = terminated or truncated
                 score += reward
                 obs = new_state
             score_history.append(score)
             print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
         self.env.close()
-        return self.agent.attributions

 import matplotlib.pyplot as plt
 import torch
 from captum.attr import (IntegratedGradients)
+from gymnasium.wrappers import RecordVideo
 class TrainingLoop:
     def __init__(self, env_spec, output_path='./output/', seed=0, **kwargs):
         assert env_spec in gym.envs.registry.keys()
+        self.defaults = {
+            "id": env_spec,
             "continuous": True,
             "gravity": -10.0,
             "render_mode": None
         }
+        self.env = None
+        self.defaults.update(**kwargs)
         torch.manual_seed(seed)
     def train(self):
         assert self.agent is not None
+        self.defaults["render_mode"] = None
+        self.env = gym.make(
+            **self.defaults
+        )
+        # self.agent.load_models()
         score_history = []
     def load_trained(self):
         assert self.agent is not None
+        self.defaults["render_mode"] = None
+        self.env = gym.make(
+            **self.defaults
+        )
         self.agent.load_models()
         score_history = []
         self.env.close()
+    # Video Recording
+    # def render_video(self, episode_trigger=100):
+    #     assert self.agent is not None
+    #     self.defaults["render_mode"] = "rgb_array"
+    #     self.env = gym.make(
+    #         **self.defaults
+    #     )
+    #     episode_trigger_callable = lambda x: x % episode_trigger == 0
+    #     self.env = RecordVideo(env=self.env, video_folder=self.output_path, name_prefix=f"{self.defaults['id']}-recording", episode_trigger=episode_trigger_callable, disable_logger=True)
+    #     self.agent.load_models()
+    #     score_history = []
+    #     for i in range(200):
+    #         done = False
+    #         score = 0
+    #         obs, _ = self.env.reset()
+    #         while not done:
+    #             act = self.agent.choose_action(observation=obs)
+    #             new_state, reward, terminated, truncated, info = self.env.step(act)
+    #             done = terminated or truncated
+    #             score += reward
+    #             obs = new_state
+    #         score_history.append(score)
+    #         print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
+    #     self.env.close()
     # Model Explainability
     from captum.attr import (IntegratedGradients)
     def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor:
         assert self.agent is not None
+        self.defaults["render_mode"] = None
+        self.env = gym.make(
+            **self.defaults
+        )
         print("--------- Collecting running baseline average ----------")
         self.agent.load_models()
         baseline = baseline_options[option]
+        self.defaults["render_mode"] = "rgb_array"
+        self.env = gym.make(
+            **self.defaults
+        )
         print("\n\n\n\n--------- Performing Attributions -----------")
         self.agent.load_models()
         self.agent.ig = ig
         score_history = []
+        frames = []
         for i in range(50):
             done = False
             score = 0
             obs, _ = self.env.reset()
             while not done:
+                frames.append(self.env.render())
                 act = self.agent.choose_action(observation=obs, baseline=baseline)
                 new_state, reward, terminated, truncated, info = self.env.step(act)
                 done = terminated or truncated
                 score += reward
                 obs = new_state
             score_history.append(score)
             print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:]))
         self.env.close()
+        try:
+            assert len(frames) == len(self.agent.attributions)
+        except AssertionError:
+            print("Frames and agent attribution history are not the same shape!")
+        else:
+            pass
+        return (frames, self.agent.attributions)