ThomasSimonini HF staff commited on
Commit
b774671
·
1 Parent(s): 03af4bc

Add multithread

Browse files
Files changed (1) hide show
  1. app.py +39 -1
app.py CHANGED
@@ -8,6 +8,8 @@ from huggingface_hub import HfApi, hf_hub_download, snapshot_download
8
  from huggingface_hub.repocard import metadata_load
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
 
 
 
11
  from utils import *
12
 
13
  DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
@@ -196,6 +198,42 @@ def get_model_ids(rl_env):
196
  model_ids = [x.modelId for x in models]
197
  return model_ids
198
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  def update_leaderboard_dataset(rl_env, path):
200
  # Get model ids associated with rl_env
201
  model_ids = get_model_ids(rl_env)
@@ -272,7 +310,7 @@ def run_update_dataset():
272
  path_ = download_leaderboard_dataset()
273
  for i in range(0, len(rl_envs)):
274
  rl_env = rl_envs[i]
275
- update_leaderboard_dataset(rl_env["rl_env"], path_)
276
 
277
  api.upload_folder(
278
  folder_path=path_,
 
8
  from huggingface_hub.repocard import metadata_load
9
  from apscheduler.schedulers.background import BackgroundScheduler
10
 
11
+ from tqdm.contrib.concurrent import thread_map
12
+
13
  from utils import *
14
 
15
  DATASET_REPO_URL = "https://huggingface.co/datasets/huggingface-projects/drlc-leaderboard-data"
 
198
  model_ids = [x.modelId for x in models]
199
  return model_ids
200
 
201
+ # Parralelized version
202
+ def update_leaderboard_dataset_parallel(rl_env, path):
203
+ # Get model ids associated with rl_env
204
+ model_ids = get_model_ids(rl_env)
205
+
206
+ def process_model(model_id):
207
+ meta = get_metadata(model_id)
208
+ #LOADED_MODEL_METADATA[model_id] = meta if meta is not None else ''
209
+ if meta is None:
210
+ return None
211
+ user_id = model_id.split('/')[0]
212
+ row = {}
213
+ row["User"] = user_id
214
+ row["Model"] = model_id
215
+ accuracy = parse_metrics_accuracy(meta)
216
+ mean_reward, std_reward = parse_rewards(accuracy)
217
+ mean_reward = mean_reward if not pd.isna(mean_reward) else 0
218
+ std_reward = std_reward if not pd.isna(std_reward) else 0
219
+ row["Results"] = mean_reward - std_reward
220
+ row["Mean Reward"] = mean_reward
221
+ row["Std Reward"] = std_reward
222
+ return row
223
+
224
+ data = list(thread_map(process_model, model_ids, desc="Processing models"))
225
+
226
+ # Filter out None results (models with no metadata)
227
+ data = [row for row in data if row is not None]
228
+
229
+ ranked_dataframe = rank_dataframe(pd.DataFrame.from_records(data))
230
+ new_history = ranked_dataframe
231
+ file_path = path + "/" + rl_env + ".csv"
232
+ new_history.to_csv(file_path, index=False)
233
+
234
+ return ranked_dataframe
235
+
236
+
237
  def update_leaderboard_dataset(rl_env, path):
238
  # Get model ids associated with rl_env
239
  model_ids = get_model_ids(rl_env)
 
310
  path_ = download_leaderboard_dataset()
311
  for i in range(0, len(rl_envs)):
312
  rl_env = rl_envs[i]
313
+ update_leaderboard_dataset_parallel(rl_env["rl_env"], path_)
314
 
315
  api.upload_folder(
316
  folder_path=path_,