Spaces:
Running
Running
hi-melnikov
commited on
Commit
•
b7741fd
1
Parent(s):
b19c539
More verbose
Browse files- app.py +1 -1
- src/leaderboard/build_leaderboard.py +7 -5
app.py
CHANGED
@@ -108,7 +108,7 @@ def update_board():
|
|
108 |
show_result_file = os.path.join(HF_HOME, "src/gen/show_result.py")
|
109 |
subprocess.run(["python3", show_result_file, "--output"], check=True)
|
110 |
|
111 |
-
# update the gr item
|
112 |
# TODO
|
113 |
|
114 |
|
|
|
108 |
show_result_file = os.path.join(HF_HOME, "src/gen/show_result.py")
|
109 |
subprocess.run(["python3", show_result_file, "--output"], check=True)
|
110 |
|
111 |
+
# update the gr item with leaderboard
|
112 |
# TODO
|
113 |
|
114 |
|
src/leaderboard/build_leaderboard.py
CHANGED
@@ -61,14 +61,17 @@ def download_openbench():
|
|
61 |
# download answers of different models that we trust
|
62 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
65 |
|
66 |
# copy the trusted model answers to data
|
67 |
subprocess.run(
|
68 |
[
|
69 |
"rsync",
|
70 |
-
"-
|
71 |
-
"--ignore-existing",
|
72 |
f"{EVAL_RESULTS_PATH}/internal/*",
|
73 |
f"{DATA_ARENA_PATH}/model_answer/internal/",
|
74 |
],
|
@@ -79,8 +82,7 @@ def download_openbench():
|
|
79 |
subprocess.run(
|
80 |
[
|
81 |
"rsync",
|
82 |
-
"-
|
83 |
-
"--ignore-existing",
|
84 |
f"{EVAL_RESULTS_PATH}/model_judgment/*",
|
85 |
f"{DATA_ARENA_PATH}/model_judgement/",
|
86 |
],
|
|
|
61 |
# download answers of different models that we trust
|
62 |
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
|
63 |
|
64 |
+
logging.info("\nInternal models in openbench-eval:")
|
65 |
+
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/internal/"], check=True)
|
66 |
+
|
67 |
+
logging.info("\nJudgement in openbench-eval")
|
68 |
+
subprocess.run(["ls", f"{EVAL_RESULTS_PATH}/model_judgment/"], check=True)
|
69 |
|
70 |
# copy the trusted model answers to data
|
71 |
subprocess.run(
|
72 |
[
|
73 |
"rsync",
|
74 |
+
"-azPvh",
|
|
|
75 |
f"{EVAL_RESULTS_PATH}/internal/*",
|
76 |
f"{DATA_ARENA_PATH}/model_answer/internal/",
|
77 |
],
|
|
|
82 |
subprocess.run(
|
83 |
[
|
84 |
"rsync",
|
85 |
+
"-azPvh",
|
|
|
86 |
f"{EVAL_RESULTS_PATH}/model_judgment/*",
|
87 |
f"{DATA_ARENA_PATH}/model_judgement/",
|
88 |
],
|