Spaces:
Running
Running
hi-melnikov
commited on
Commit
•
d317f64
1
Parent(s):
2c64c31
Moved build leaderboard to different folder
Browse files- app.py +1 -60
- src/leaderboard/build_leaderboard.py +68 -0
app.py
CHANGED
@@ -1,15 +1,11 @@
|
|
1 |
-
import json
|
2 |
import logging
|
3 |
import os
|
4 |
import subprocess
|
5 |
-
import time
|
6 |
|
7 |
import gradio as gr
|
8 |
-
import pandas as pd
|
9 |
from apscheduler.schedulers.background import BackgroundScheduler
|
10 |
from gradio_leaderboard import Leaderboard, SelectColumns
|
11 |
from gradio_space_ci import enable_space_ci
|
12 |
-
from huggingface_hub import snapshot_download
|
13 |
|
14 |
from src.display.about import (
|
15 |
INTRODUCTION_TEXT,
|
@@ -22,11 +18,11 @@ from src.display.utils import (
|
|
22 |
)
|
23 |
from src.envs import (
|
24 |
API,
|
25 |
-
EVAL_RESULTS_PATH,
|
26 |
H4_TOKEN,
|
27 |
REPO_ID,
|
28 |
RESET_JUDGEMENT_ENV,
|
29 |
)
|
|
|
30 |
|
31 |
os.environ['GRADIO_ANALYTICS_ENABLED']='false'
|
32 |
|
@@ -40,61 +36,6 @@ def restart_space():
|
|
40 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
41 |
|
42 |
|
43 |
-
def time_diff_wrapper(func):
|
44 |
-
def wrapper(*args, **kwargs):
|
45 |
-
start_time = time.time()
|
46 |
-
result = func(*args, **kwargs)
|
47 |
-
end_time = time.time()
|
48 |
-
diff = end_time - start_time
|
49 |
-
logging.info(f"Time taken for {func.__name__}: {diff} seconds")
|
50 |
-
return result
|
51 |
-
return wrapper
|
52 |
-
|
53 |
-
|
54 |
-
@time_diff_wrapper
|
55 |
-
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
|
56 |
-
"""Download dataset with exponential backoff retries."""
|
57 |
-
attempt = 0
|
58 |
-
while attempt < max_attempts:
|
59 |
-
try:
|
60 |
-
logging.info(f"Downloading {repo_id} to {local_dir}")
|
61 |
-
snapshot_download(
|
62 |
-
repo_id=repo_id,
|
63 |
-
local_dir=local_dir,
|
64 |
-
repo_type=repo_type,
|
65 |
-
tqdm_class=None,
|
66 |
-
token=os.environ.get("HF_TOKEN"),
|
67 |
-
etag_timeout=30,
|
68 |
-
max_workers=8,
|
69 |
-
)
|
70 |
-
logging.info("Download successful")
|
71 |
-
return
|
72 |
-
except Exception as e:
|
73 |
-
wait_time = backoff_factor ** attempt
|
74 |
-
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
75 |
-
time.sleep(wait_time)
|
76 |
-
attempt += 1
|
77 |
-
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
78 |
-
|
79 |
-
def build_leadearboard_df():
|
80 |
-
"""Initializes the application space, loading only necessary data."""
|
81 |
-
# Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
|
82 |
-
if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
|
83 |
-
# These downloads only occur on full initialization
|
84 |
-
# try:
|
85 |
-
# download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
86 |
-
# download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
87 |
-
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
88 |
-
# print(subprocess.Popen('ls src'))
|
89 |
-
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
|
90 |
-
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
|
91 |
-
# except Exception:
|
92 |
-
# restart_space()
|
93 |
-
|
94 |
-
# Always retrieve the leaderboard DataFrame
|
95 |
-
leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
|
96 |
-
return leaderboard_df.copy()
|
97 |
-
|
98 |
def build_demo():
|
99 |
demo = gr.Blocks(
|
100 |
title = "Chatbot Arena Leaderboard",
|
|
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
import subprocess
|
|
|
4 |
|
5 |
import gradio as gr
|
|
|
6 |
from apscheduler.schedulers.background import BackgroundScheduler
|
7 |
from gradio_leaderboard import Leaderboard, SelectColumns
|
8 |
from gradio_space_ci import enable_space_ci
|
|
|
9 |
|
10 |
from src.display.about import (
|
11 |
INTRODUCTION_TEXT,
|
|
|
18 |
)
|
19 |
from src.envs import (
|
20 |
API,
|
|
|
21 |
H4_TOKEN,
|
22 |
REPO_ID,
|
23 |
RESET_JUDGEMENT_ENV,
|
24 |
)
|
25 |
+
from src.leaderboard.build_leaderboard import build_leadearboard_df
|
26 |
|
27 |
os.environ['GRADIO_ANALYTICS_ENABLED']='false'
|
28 |
|
|
|
36 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
37 |
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
def build_demo():
|
40 |
demo = gr.Blocks(
|
41 |
title = "Chatbot Arena Leaderboard",
|
src/leaderboard/build_leaderboard.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
import os
|
5 |
+
import subprocess
|
6 |
+
import time
|
7 |
+
|
8 |
+
import pandas as pd
|
9 |
+
from huggingface_hub import snapshot_download
|
10 |
+
|
11 |
+
from src.envs import EVAL_RESULTS_PATH
|
12 |
+
|
13 |
+
# Configure logging
|
14 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
15 |
+
|
16 |
+
def time_diff_wrapper(func):
|
17 |
+
def wrapper(*args, **kwargs):
|
18 |
+
start_time = time.time()
|
19 |
+
result = func(*args, **kwargs)
|
20 |
+
end_time = time.time()
|
21 |
+
diff = end_time - start_time
|
22 |
+
logging.info(f"Time taken for {func.__name__}: {diff} seconds")
|
23 |
+
return result
|
24 |
+
return wrapper
|
25 |
+
|
26 |
+
@time_diff_wrapper
|
27 |
+
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
|
28 |
+
"""Download dataset with exponential backoff retries."""
|
29 |
+
attempt = 0
|
30 |
+
while attempt < max_attempts:
|
31 |
+
try:
|
32 |
+
logging.info(f"Downloading {repo_id} to {local_dir}")
|
33 |
+
snapshot_download(
|
34 |
+
repo_id=repo_id,
|
35 |
+
local_dir=local_dir,
|
36 |
+
repo_type=repo_type,
|
37 |
+
tqdm_class=None,
|
38 |
+
token=os.environ.get("HF_TOKEN"),
|
39 |
+
etag_timeout=30,
|
40 |
+
max_workers=8,
|
41 |
+
)
|
42 |
+
logging.info("Download successful")
|
43 |
+
return
|
44 |
+
except Exception as e:
|
45 |
+
wait_time = backoff_factor ** attempt
|
46 |
+
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
47 |
+
time.sleep(wait_time)
|
48 |
+
attempt += 1
|
49 |
+
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
50 |
+
|
51 |
+
def build_leadearboard_df():
|
52 |
+
"""Initializes the application space, loading only necessary data."""
|
53 |
+
# Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
|
54 |
+
if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
|
55 |
+
# These downloads only occur on full initialization
|
56 |
+
# try:
|
57 |
+
# download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
58 |
+
# download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
59 |
+
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
60 |
+
# print(subprocess.Popen('ls src'))
|
61 |
+
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/external/*', 'src/gen/data/arena-hard-v0.1/model_answer/'], check=False)
|
62 |
+
subprocess.run(['rsync', '-avzP', '--ignore-existing', f'{EVAL_RESULTS_PATH[2:]}/model_judgment/*', 'src/gen/data/arena-hard-v0.1/model_judgement/'], check=False)
|
63 |
+
# except Exception:
|
64 |
+
# restart_space()
|
65 |
+
|
66 |
+
# Always retrieve the leaderboard DataFrame
|
67 |
+
leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
|
68 |
+
return leaderboard_df.copy()
|