pkalkman's picture
updated title
6f6a1b0
import os
import json
import datetime
from apscheduler.schedulers.background import BackgroundScheduler
import gradio as gr
import pandas as pd
from huggingface_hub import HfApi, snapshot_download
from utils import make_clickable_model
from utils import make_clickable_user
DATASET_REPO_URL = "https://huggingface.co/datasets/pkalkman/drlc-leaderboard-data"
DATASET_REPO_ID = "pkalkman/drlc-leaderboard-data"
HF_TOKEN = os.environ.get("HF_TOKEN")
block = gr.Blocks()
api = HfApi(token=HF_TOKEN)
# Read the environments from the JSON file
with open('envs.json', 'r') as f:
rl_envs = json.load(f)
def download_leaderboard_dataset():
# Download the dataset from the Hugging Face Hub
path = snapshot_download(repo_id=DATASET_REPO_ID, repo_type="dataset")
return path
def get_data(rl_env, path) -> pd.DataFrame:
"""
Get data from rl_env CSV file, format model and user as clickable links, and return as DataFrame
"""
csv_path = os.path.join(path, rl_env + ".csv")
data = pd.read_csv(csv_path)
# Add clickable links for model and user
for index, row in data.iterrows():
data.at[index, "User"] = make_clickable_user(row["User"])
data.at[index, "Model"] = make_clickable_model(row["Model"])
return data
def get_last_refresh_time(path) -> str:
"""
Get the last update time from the last_update.txt file in the dataset path.
"""
# Path to the last_update.txt file
update_file_path = os.path.join(path, 'last_update.txt')
# Check if the file exists
if os.path.exists(update_file_path):
# Read the content of the file (the timestamp)
with open(update_file_path, 'r') as f:
last_refresh_time = f.read().strip()
return last_refresh_time
else:
# Fallback: If the file is missing, return a default message
return "Last update time not available"
# Function to refresh the dataset periodically
def refresh_dataset():
global path_
path_ = download_leaderboard_dataset() # Redownload the dataset
global last_refresh_time
last_refresh_time = get_last_refresh_time(path_)
# Set up a background scheduler to refresh the dataset every hour
scheduler = BackgroundScheduler()
scheduler.add_job(refresh_dataset, 'interval', minutes=15)
scheduler.start()
with block:
path_ = download_leaderboard_dataset()
# Get the last refresh time
last_refresh_time = get_last_refresh_time(path_)
gr.Markdown(f"""
# πŸ† Deep Reinforcement Learning Course Leaderboard (Mirror)πŸ†
Presenting the latest leaderboard from the Hugging Face Deep RL Course - refreshed at {last_refresh_time}.
""")
for i in range(0, len(rl_envs)):
rl_env = rl_envs[i]
with gr.TabItem(rl_env["rl_env_beautiful"]):
with gr.Row():
markdown = f"""
# {rl_env['rl_env_beautiful']}
### Leaderboard for {rl_env['rl_env_beautiful']}
"""
gr.Markdown(markdown)
with gr.Row():
# Display the data for this RL environment
data = get_data(rl_env["rl_env"], path_)
gr.Dataframe(
value=data,
headers=["Ranking πŸ†", "User πŸ€—", "Model id πŸ€–", "Results", "Mean Reward", "Std Reward"],
datatype=["number", "markdown", "markdown", "number", "number", "number"],
row_count=(100, 'fixed')
)
block.launch()