pkalkman commited on
Commit
3790b84
β€’
1 Parent(s): c695fc2

Added updater code

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +184 -0
  3. envs.json +128 -0
  4. requirements.txt +4 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Leaderboard Updater
3
- emoji: πŸ¦€
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
@@ -11,4 +11,4 @@ license: mit
11
  short_description: The process to update my copy of the leaderboard dataset
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Deep Reinforcement Learning Leaderboard Updater
3
+ emoji: πŸš€
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
 
11
  short_description: The process to update my copy of the leaderboard dataset
12
  ---
13
 
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+
5
+ import pandas as pd
6
+ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
7
+ from huggingface_hub.repocard import metadata_load
8
+ from tqdm.contrib.concurrent import thread_map
9
+ from apscheduler.schedulers.background import BackgroundScheduler
10
+
11
+ from tqdm.contrib.concurrent import thread_map
12
+
13
+ DATASET_REPO_URL = "https://huggingface.co/datasets/pkalkman/drlc-leaderboard-data"
14
+ DATASET_REPO_ID = "pkalkman/drlc-leaderboard-data"
15
+ HF_TOKEN = os.environ.get("HF_TOKEN")
16
+
17
+ api = HfApi(token=HF_TOKEN)
18
+
19
+
20
+ # Read the environments from the JSON file
21
+ with open('envs.json', 'r') as f:
22
+ rl_envs = json.load(f)
23
+
24
+
25
+ def download_leaderboard_dataset():
26
+ # Download the dataset from the Hugging Face Hub
27
+ path = snapshot_download(repo_id=DATASET_REPO_ID, repo_type="dataset")
28
+ return path
29
+
30
+
31
+ def get_metadata(model_id):
32
+ try:
33
+ readme_path = hf_hub_download(model_id, filename="README.md", etag_timeout=180)
34
+ return metadata_load(readme_path)
35
+ except requests.exceptions.HTTPError:
36
+ # 404 README.md not found
37
+ return None
38
+
39
+
40
+ def parse_metrics_accuracy(meta):
41
+ if "model-index" not in meta:
42
+ return None
43
+ result = meta["model-index"][0]["results"]
44
+ metrics = result[0]["metrics"]
45
+ accuracy = metrics[0]["value"]
46
+ return accuracy
47
+
48
+
49
+ # We keep the worst case episode
50
+ def parse_rewards(accuracy):
51
+ default_std = -1000
52
+ default_reward = -1000
53
+ if accuracy is not None:
54
+ accuracy = str(accuracy)
55
+ parsed = accuracy.split('+/-')
56
+ if len(parsed) > 1:
57
+ mean_reward = float(parsed[0].strip())
58
+ std_reward = float(parsed[1].strip())
59
+ elif len(parsed) == 1: # only mean reward
60
+ mean_reward = float(parsed[0].strip())
61
+ std_reward = float(0)
62
+ else:
63
+ mean_reward = float(default_std)
64
+ std_reward = float(default_reward)
65
+
66
+ else:
67
+ mean_reward = float(default_std)
68
+ std_reward = float(default_reward)
69
+ return mean_reward, std_reward
70
+
71
+
72
+ def get_model_ids(rl_env):
73
+ api = HfApi()
74
+ models = api.list_models(filter=rl_env)
75
+ model_ids = [x.modelId for x in models]
76
+ return model_ids
77
+
78
+
79
+ # Parralelized version
80
+ def update_leaderboard_dataset_parallel(rl_env, path):
81
+ # Get model ids associated with rl_env
82
+ model_ids = get_model_ids(rl_env)
83
+
84
+ def process_model(model_id):
85
+ meta = get_metadata(model_id)
86
+ # LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
87
+ if meta is None:
88
+ return None
89
+ user_id = model_id.split('/')[0]
90
+ row = {}
91
+ row["User"] = user_id
92
+ row["Model"] = model_id
93
+ accuracy = parse_metrics_accuracy(meta)
94
+ mean_reward, std_reward = parse_rewards(accuracy)
95
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
96
+ std_reward = std_reward if not pd.isna(std_reward) else 0
97
+ row["Results"] = mean_reward - std_reward
98
+ row["Mean Reward"] = mean_reward
99
+ row["Std Reward"] = std_reward
100
+ return row
101
+
102
+ data = list(thread_map(process_model, model_ids, desc="Processing models"))
103
+
104
+ # Filter out None results (models with no metadata)
105
+ data = [row for row in data if row is not None]
106
+
107
+ ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
108
+ new_history = ranked_dataframe
109
+ file_path = path + "/" + rl_env + ".csv"
110
+ new_history.to_csv(file_path, index=False)
111
+
112
+ return ranked_dataframe
113
+
114
+
115
+ def update_leaderboard_dataset(rl_env, path):
116
+ # Get model ids associated with rl_env
117
+ model_ids = get_model_ids(rl_env)
118
+ data = []
119
+ for model_id in model_ids:
120
+ """
121
+ readme_path = hf_hub_download(model_id, filename="README.md")
122
+ meta = metadata_load(readme_path)
123
+ """
124
+ meta = get_metadata(model_id)
125
+ # LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
126
+ if meta is None:
127
+ continue
128
+ user_id = model_id.split('/')[0]
129
+ row = {}
130
+ row["User"] = user_id
131
+ row["Model"] = model_id
132
+ accuracy = parse_metrics_accuracy(meta)
133
+ mean_reward, std_reward = parse_rewards(accuracy)
134
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
135
+ std_reward = std_reward if not pd.isna(std_reward) else 0
136
+ row["Results"] = mean_reward - std_reward
137
+ row["Mean Reward"] = mean_reward
138
+ row["Std Reward"] = std_reward
139
+ data.append(row)
140
+
141
+ ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
142
+ new_history = ranked_dataframe
143
+ file_path = path + "/" + rl_env + ".csv"
144
+ new_history.to_csv(file_path, index=False)
145
+
146
+ return ranked_dataframe
147
+
148
+
149
+ def get_data_no_html(rl_env, path) -> pd.DataFrame:
150
+ """
151
+ Get data from rl_env
152
+ :return: data as a pandas DataFrame
153
+ """
154
+ csv_path = path + "/" + rl_env + ".csv"
155
+ data = pd.read_csv(csv_path)
156
+
157
+ return data
158
+
159
+
160
+ def rank_dataframe(dataframe):
161
+ dataframe = dataframe.sort_values(by=['Results', 'User', 'Model'], ascending=False)
162
+ if 'Ranking' not in dataframe.columns:
163
+ dataframe.insert(0, 'Ranking', [i for i in range(1, len(dataframe) + 1)])
164
+ else:
165
+ dataframe['Ranking'] = [i for i in range(1, len(dataframe) + 1)]
166
+ return dataframe
167
+
168
+
169
+ def run_update_dataset():
170
+ path_ = download_leaderboard_dataset()
171
+ for i in range(0, len(rl_envs)):
172
+ rl_env = rl_envs[i]
173
+ update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
174
+
175
+ api.upload_folder(
176
+ folder_path=path_,
177
+ repo_id="pkalkman/drlc-leaderboard-data",
178
+ repo_type="dataset",
179
+ commit_message="Update dataset")
180
+
181
+
182
+ scheduler = BackgroundScheduler()
183
+ scheduler.add_job(run_update_dataset, 'interval', seconds=10800)
184
+ scheduler.start()
envs.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "rl_env_beautiful": "LunarLander-v2 πŸš€",
4
+ "rl_env": "LunarLander-v2",
5
+ "video_link": "",
6
+ "global": null
7
+ },
8
+ {
9
+ "rl_env_beautiful": "CartPole-v1",
10
+ "rl_env": "CartPole-v1",
11
+ "video_link": "",
12
+ "global": null
13
+ },
14
+ {
15
+ "rl_env_beautiful": "FrozenLake-v1-4x4-no_slippery ❄️",
16
+ "rl_env": "FrozenLake-v1-4x4-no_slippery",
17
+ "video_link": "",
18
+ "global": null
19
+ },
20
+ {
21
+ "rl_env_beautiful": "FrozenLake-v1-8x8-no_slippery ❄️",
22
+ "rl_env": "FrozenLake-v1-8x8-no_slippery",
23
+ "video_link": "",
24
+ "global": null
25
+ },
26
+ {
27
+ "rl_env_beautiful": "FrozenLake-v1-4x4 ❄️",
28
+ "rl_env": "FrozenLake-v1-4x4",
29
+ "video_link": "",
30
+ "global": null
31
+ },
32
+ {
33
+ "rl_env_beautiful": "FrozenLake-v1-8x8 ❄️",
34
+ "rl_env": "FrozenLake-v1-8x8",
35
+ "video_link": "",
36
+ "global": null
37
+ },
38
+ {
39
+ "rl_env_beautiful": "Taxi-v3 πŸš–",
40
+ "rl_env": "Taxi-v3",
41
+ "video_link": "",
42
+ "global": null
43
+ },
44
+ {
45
+ "rl_env_beautiful": "CarRacing-v0 🏎️",
46
+ "rl_env": "CarRacing-v0",
47
+ "video_link": "",
48
+ "global": null
49
+ },
50
+ {
51
+ "rl_env_beautiful": "CarRacing-v2 🏎️",
52
+ "rl_env": "CarRacing-v2",
53
+ "video_link": "",
54
+ "global": null
55
+ },
56
+ {
57
+ "rl_env_beautiful": "MountainCar-v0 ⛰️",
58
+ "rl_env": "MountainCar-v0",
59
+ "video_link": "",
60
+ "global": null
61
+ },
62
+ {
63
+ "rl_env_beautiful": "SpaceInvadersNoFrameskip-v4 πŸ‘Ύ",
64
+ "rl_env": "SpaceInvadersNoFrameskip-v4",
65
+ "video_link": "",
66
+ "global": null
67
+ },
68
+ {
69
+ "rl_env_beautiful": "PongNoFrameskip-v4 🎾",
70
+ "rl_env": "PongNoFrameskip-v4",
71
+ "video_link": "",
72
+ "global": null
73
+ },
74
+ {
75
+ "rl_env_beautiful": "BreakoutNoFrameskip-v4 🧱",
76
+ "rl_env": "BreakoutNoFrameskip-v4",
77
+ "video_link": "",
78
+ "global": null
79
+ },
80
+ {
81
+ "rl_env_beautiful": "QbertNoFrameskip-v4 🐦",
82
+ "rl_env": "QbertNoFrameskip-v4",
83
+ "video_link": "",
84
+ "global": null
85
+ },
86
+ {
87
+ "rl_env_beautiful": "BipedalWalker-v3",
88
+ "rl_env": "BipedalWalker-v3",
89
+ "video_link": "",
90
+ "global": null
91
+ },
92
+ {
93
+ "rl_env_beautiful": "Walker2DBulletEnv-v0",
94
+ "rl_env": "Walker2DBulletEnv-v0",
95
+ "video_link": "",
96
+ "global": null
97
+ },
98
+ {
99
+ "rl_env_beautiful": "AntBulletEnv-v0",
100
+ "rl_env": "AntBulletEnv-v0",
101
+ "video_link": "",
102
+ "global": null
103
+ },
104
+ {
105
+ "rl_env_beautiful": "HalfCheetahBulletEnv-v0",
106
+ "rl_env": "HalfCheetahBulletEnv-v0",
107
+ "video_link": "",
108
+ "global": null
109
+ },
110
+ {
111
+ "rl_env_beautiful": "PandaReachDense-v2",
112
+ "rl_env": "PandaReachDense-v2",
113
+ "video_link": "",
114
+ "global": null
115
+ },
116
+ {
117
+ "rl_env_beautiful": "PandaReachDense-v3",
118
+ "rl_env": "PandaReachDense-v3",
119
+ "video_link": "",
120
+ "global": null
121
+ },
122
+ {
123
+ "rl_env_beautiful": "Pixelcopter-PLE-v0",
124
+ "rl_env": "Pixelcopter-PLE-v0",
125
+ "video_link": "",
126
+ "global": null
127
+ }
128
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ APScheduler==3.10.1
2
+ gradio==4.0
3
+ httpx==0.24.0
4
+ tqdm