Spaces:

riiswa
/

RL-Interpretable-Policy-via-Kolmogorov-Arnold-Network

Sleeping

riiswa commited on May 4, 2024

Commit

1240765

1 Parent(s): d9d70e0

Add warning on mujoco using

Files changed (5) hide show

README.md CHANGED Viewed

@@ -11,3 +11,10 @@ license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+### Application demo :
+- Choose a RL environment from the gymnasium library. A policy from a pre-trained Proximal Policy Optimization (PPO) agent will automatically be loaded, which generates an expert dataset and videos of the agent's performance in the selected environment.
+- Click the "Compute Symbolic Policy" button to train a KAN policy on the expert dataset. Once it is done, you can visualize the KAN network and watch videos of the KAN agent's performance in the selected environment !
+<img alt="Interpretability app demo" src="demo/app_demo.gif">

app.py CHANGED Viewed

@@ -36,7 +36,7 @@ For more information about KAN you can read the [paper](https://arxiv.org/abs/24
 To follow the progress of KAN in RL you can check the repo [kanrl](https://github.com/riiswa/kanrl).
 """
-envs = ["CartPole-v1", "MountainCar-v0", "Acrobot-v1", "Pendulum-v1", "MountainCarContinuous-v0", "LunarLander-v2", "Swimmer-v4", "Hopper-v4"]
 if __name__ == "__main__":
@@ -45,6 +45,8 @@ if __name__ == "__main__":
     def load_video_and_dataset(_env_name):
         env_name = _env_name
         dataset_path, video_path = generate_dataset_from_expert("ppo", _env_name, 15, 3)
         return video_path, gr.Button("Compute the symbolic policy!", interactive=True), {
             "dataset_path": dataset_path,

 To follow the progress of KAN in RL you can check the repo [kanrl](https://github.com/riiswa/kanrl).
 """
+envs = ["CartPole-v1", "MountainCar-v0", "Acrobot-v1", "Pendulum-v1", "MountainCarContinuous-v0", "LunarLander-v2", "Swimmer-v3", "Hopper-v3"]
 if __name__ == "__main__":
     def load_video_and_dataset(_env_name):
         env_name = _env_name
+        if env_name.startswith("Swimmer") or env_name.startswith("Hopper-v3"):
+            gr.Warning("We're currently in the process of adding support for Mujoco environments, so the application may encounter crashes during this phase. We encourage contributors to join us in the repository https://github.com/riiswa/kanrl to assist in the development and support of other environments. Your contributions are invaluable in ensuring a robust and comprehensive framework.")
         dataset_path, video_path = generate_dataset_from_expert("ppo", _env_name, 15, 3)
         return video_path, gr.Button("Compute the symbolic policy!", interactive=True), {
             "dataset_path": dataset_path,

packages.txt CHANGED Viewed

@@ -3,4 +3,5 @@ libgl1-mesa-glx
 libglew-dev
 libosmesa6-dev
 software-properties-common
-patchelf

 libglew-dev
 libosmesa6-dev
 software-properties-common
+patchelf
+swig

requirements.txt CHANGED Viewed

@@ -13,4 +13,4 @@ stable_baselines3
 rl_zoo3
 gym
 shimmy>=0.2.1
-mujoco-py

 rl_zoo3
 gym
 shimmy>=0.2.1
+free-mujoco-py

utils.py CHANGED Viewed

@@ -112,10 +112,6 @@ def rollouts(env, policy, num_episodes=1):
 def generate_dataset_from_expert(algo, env_name, num_train_episodes=5, num_test_episodes=2, force=False):
     if env_name.startswith("Swimmer") or env_name.startswith("Hopper"):
         install_mujoco()
-    if env_name == "Swimmer-v4":
-        env_name = "Swimmer-v3"
-    elif env_name == "Hopper-v4":
-        env_name = "Hopper-v3"
     dataset_path = os.path.join("datasets", f"{algo}-{env_name}.pt")
     video_path = os.path.join("videos", f"{algo}-{env_name}.mp4")
     if os.path.exists(dataset_path) and os.path.exists(video_path) and not force:

 def generate_dataset_from_expert(algo, env_name, num_train_episodes=5, num_test_episodes=2, force=False):
     if env_name.startswith("Swimmer") or env_name.startswith("Hopper"):
         install_mujoco()
     dataset_path = os.path.join("datasets", f"{algo}-{env_name}.pt")
     video_path = os.path.join("videos", f"{algo}-{env_name}.mp4")
     if os.path.exists(dataset_path) and os.path.exists(video_path) and not force: