Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
t0-0
commited on
Commit
·
0109b82
1
Parent(s):
5b66510
Display llm-jp-eval version and backend library
Browse files- app.py +36 -0
- src/display/utils.py +26 -0
- src/leaderboard/read_evals.py +10 -1
app.py
CHANGED
@@ -26,9 +26,11 @@ from src.display.utils import (
|
|
26 |
TYPES,
|
27 |
AddSpecialTokens,
|
28 |
AutoEvalColumn,
|
|
|
29 |
ModelType,
|
30 |
NumFewShots,
|
31 |
Precision,
|
|
|
32 |
fields,
|
33 |
)
|
34 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
@@ -75,6 +77,8 @@ def filter_models(
|
|
75 |
precision_query: list,
|
76 |
add_special_tokens_query: list,
|
77 |
num_few_shots_query: list,
|
|
|
|
|
78 |
) -> pd.DataFrame:
|
79 |
print(f"Initial df shape: {df.shape}")
|
80 |
print(f"Initial df content:\n{df}")
|
@@ -110,6 +114,14 @@ def filter_models(
|
|
110 |
]
|
111 |
print(f"After num_few_shots filter: {filtered_df.shape}")
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
print("Filtered dataframe head:")
|
114 |
print(filtered_df.head())
|
115 |
return filtered_df
|
@@ -177,6 +189,8 @@ def update_table(
|
|
177 |
size_query: list,
|
178 |
add_special_tokens_query: list,
|
179 |
num_few_shots_query: list,
|
|
|
|
|
180 |
query: str,
|
181 |
):
|
182 |
print(
|
@@ -191,6 +205,8 @@ def update_table(
|
|
191 |
precision_query,
|
192 |
add_special_tokens_query,
|
193 |
num_few_shots_query,
|
|
|
|
|
194 |
)
|
195 |
print(f"filtered_df shape after filter_models: {filtered_df.shape}")
|
196 |
|
@@ -236,6 +252,8 @@ leaderboard_df = filter_models(
|
|
236 |
[i.value.name for i in Precision],
|
237 |
[i.value.name for i in AddSpecialTokens],
|
238 |
[i.value.name for i in NumFewShots],
|
|
|
|
|
239 |
)
|
240 |
|
241 |
leaderboard_df_filtered = filter_models(
|
@@ -245,6 +263,8 @@ leaderboard_df_filtered = filter_models(
|
|
245 |
[i.value.name for i in Precision],
|
246 |
[i.value.name for i in AddSpecialTokens],
|
247 |
[i.value.name for i in NumFewShots],
|
|
|
|
|
248 |
)
|
249 |
|
250 |
# DataFrameの初期化部分のみを修正
|
@@ -309,6 +329,18 @@ with gr.Blocks() as demo_leaderboard:
|
|
309 |
value=[i.value.name for i in NumFewShots],
|
310 |
elem_id="filter-columns-num-few-shots",
|
311 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
|
313 |
# DataFrameコンポーネントの初期化
|
314 |
leaderboard_table = gr.Dataframe(
|
@@ -340,6 +372,8 @@ with gr.Blocks() as demo_leaderboard:
|
|
340 |
filter_columns_size.change,
|
341 |
filter_columns_add_special_tokens.change,
|
342 |
filter_columns_num_few_shots.change,
|
|
|
|
|
343 |
search_bar.submit,
|
344 |
],
|
345 |
fn=update_table,
|
@@ -351,6 +385,8 @@ with gr.Blocks() as demo_leaderboard:
|
|
351 |
filter_columns_size,
|
352 |
filter_columns_add_special_tokens,
|
353 |
filter_columns_num_few_shots,
|
|
|
|
|
354 |
search_bar,
|
355 |
],
|
356 |
outputs=leaderboard_table,
|
|
|
26 |
TYPES,
|
27 |
AddSpecialTokens,
|
28 |
AutoEvalColumn,
|
29 |
+
Backend,
|
30 |
ModelType,
|
31 |
NumFewShots,
|
32 |
Precision,
|
33 |
+
Version,
|
34 |
fields,
|
35 |
)
|
36 |
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO
|
|
|
77 |
precision_query: list,
|
78 |
add_special_tokens_query: list,
|
79 |
num_few_shots_query: list,
|
80 |
+
version_query: list,
|
81 |
+
backend_query: list,
|
82 |
) -> pd.DataFrame:
|
83 |
print(f"Initial df shape: {df.shape}")
|
84 |
print(f"Initial df content:\n{df}")
|
|
|
114 |
]
|
115 |
print(f"After num_few_shots filter: {filtered_df.shape}")
|
116 |
|
117 |
+
# Version フィルタリング
|
118 |
+
filtered_df = filtered_df[filtered_df["llm-jp-eval version"].isin(version_query)]
|
119 |
+
print(f"After version filter: {filtered_df.shape}")
|
120 |
+
|
121 |
+
# Backend フィルタリング
|
122 |
+
filtered_df = filtered_df[filtered_df["Backend Library"].isin(backend_query)]
|
123 |
+
print(f"After backend filter: {filtered_df.shape}")
|
124 |
+
|
125 |
print("Filtered dataframe head:")
|
126 |
print(filtered_df.head())
|
127 |
return filtered_df
|
|
|
189 |
size_query: list,
|
190 |
add_special_tokens_query: list,
|
191 |
num_few_shots_query: list,
|
192 |
+
version_query: list,
|
193 |
+
backend_query: list,
|
194 |
query: str,
|
195 |
):
|
196 |
print(
|
|
|
205 |
precision_query,
|
206 |
add_special_tokens_query,
|
207 |
num_few_shots_query,
|
208 |
+
version_query,
|
209 |
+
backend_query,
|
210 |
)
|
211 |
print(f"filtered_df shape after filter_models: {filtered_df.shape}")
|
212 |
|
|
|
252 |
[i.value.name for i in Precision],
|
253 |
[i.value.name for i in AddSpecialTokens],
|
254 |
[i.value.name for i in NumFewShots],
|
255 |
+
[i.value.name for i in Version],
|
256 |
+
[i.value.name for i in Backend],
|
257 |
)
|
258 |
|
259 |
leaderboard_df_filtered = filter_models(
|
|
|
263 |
[i.value.name for i in Precision],
|
264 |
[i.value.name for i in AddSpecialTokens],
|
265 |
[i.value.name for i in NumFewShots],
|
266 |
+
[i.value.name for i in Version],
|
267 |
+
[i.value.name for i in Backend],
|
268 |
)
|
269 |
|
270 |
# DataFrameの初期化部分のみを修正
|
|
|
329 |
value=[i.value.name for i in NumFewShots],
|
330 |
elem_id="filter-columns-num-few-shots",
|
331 |
)
|
332 |
+
filter_columns_version = gr.CheckboxGroup(
|
333 |
+
label="Version",
|
334 |
+
choices=[i.value.name for i in Version],
|
335 |
+
value=[i.value.name for i in Version],
|
336 |
+
elem_id="filter-columns-version",
|
337 |
+
)
|
338 |
+
filter_columns_backend = gr.CheckboxGroup(
|
339 |
+
label="Backend",
|
340 |
+
choices=[i.value.name for i in Backend],
|
341 |
+
value=[i.value.name for i in Backend],
|
342 |
+
elem_id="filter-columns-backend",
|
343 |
+
)
|
344 |
|
345 |
# DataFrameコンポーネントの初期化
|
346 |
leaderboard_table = gr.Dataframe(
|
|
|
372 |
filter_columns_size.change,
|
373 |
filter_columns_add_special_tokens.change,
|
374 |
filter_columns_num_few_shots.change,
|
375 |
+
filter_columns_version.change,
|
376 |
+
filter_columns_backend.change,
|
377 |
search_bar.submit,
|
378 |
],
|
379 |
fn=update_table,
|
|
|
385 |
filter_columns_size,
|
386 |
filter_columns_add_special_tokens,
|
387 |
filter_columns_num_few_shots,
|
388 |
+
filter_columns_version,
|
389 |
+
filter_columns_backend,
|
390 |
search_bar,
|
391 |
],
|
392 |
outputs=leaderboard_table,
|
src/display/utils.py
CHANGED
@@ -44,6 +44,10 @@ auto_eval_column_dict.append(["still_on_hub", ColumnContent, ColumnContent("Avai
|
|
44 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
45 |
auto_eval_column_dict.append(["num_few_shots", ColumnContent, ColumnContent("Few-shot", "str", False)])
|
46 |
auto_eval_column_dict.append(["add_special_tokens", ColumnContent, ColumnContent("Add Special Tokens", "bool", False)])
|
|
|
|
|
|
|
|
|
47 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
48 |
|
49 |
# We use make dataclass to dynamically fill the scores from Tasks
|
@@ -130,6 +134,28 @@ class NumFewShots(Enum):
|
|
130 |
return NumFewShots.Unknown
|
131 |
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
# Column selection
|
134 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
135 |
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
|
|
44 |
auto_eval_column_dict.append(["revision", ColumnContent, ColumnContent("Model sha", "str", False, False)])
|
45 |
auto_eval_column_dict.append(["num_few_shots", ColumnContent, ColumnContent("Few-shot", "str", False)])
|
46 |
auto_eval_column_dict.append(["add_special_tokens", ColumnContent, ColumnContent("Add Special Tokens", "bool", False)])
|
47 |
+
auto_eval_column_dict.append(
|
48 |
+
["llm_jp_eval_version", ColumnContent, ColumnContent("llm-jp-eval version", "str", False)]
|
49 |
+
)
|
50 |
+
auto_eval_column_dict.append(["backend", ColumnContent, ColumnContent("Backend Library", "str", False)])
|
51 |
auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_for_query", "str", False, dummy=True)])
|
52 |
|
53 |
# We use make dataclass to dynamically fill the scores from Tasks
|
|
|
134 |
return NumFewShots.Unknown
|
135 |
|
136 |
|
137 |
+
class Version(Enum):
|
138 |
+
v1_4_1 = ModelDetails("v1.4.1")
|
139 |
+
Unknown = ModelDetails("?")
|
140 |
+
|
141 |
+
def from_str(version):
|
142 |
+
if version == "1.4.1":
|
143 |
+
return Version.v1_4_1
|
144 |
+
else:
|
145 |
+
return Version.Unknown
|
146 |
+
|
147 |
+
|
148 |
+
class Backend(Enum):
|
149 |
+
vllm = ModelDetails("vllm")
|
150 |
+
Unknown = ModelDetails("?")
|
151 |
+
|
152 |
+
def from_str(backend):
|
153 |
+
if backend == "vllm":
|
154 |
+
return Backend.vllm
|
155 |
+
else:
|
156 |
+
return Backend.Unknown
|
157 |
+
|
158 |
+
|
159 |
# Column selection
|
160 |
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
|
161 |
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
src/leaderboard/read_evals.py
CHANGED
@@ -7,7 +7,7 @@ from decimal import Decimal
|
|
7 |
import dateutil
|
8 |
|
9 |
from src.display.formatting import make_clickable_model
|
10 |
-
from src.display.utils import AutoEvalColumn, ModelType, Tasks, WeightType
|
11 |
from src.submission.check_validity import is_model_on_hub
|
12 |
|
13 |
|
@@ -34,6 +34,8 @@ class EvalResult:
|
|
34 |
still_on_hub: bool = False
|
35 |
num_few_shots: str = "0"
|
36 |
add_special_tokens: str = ""
|
|
|
|
|
37 |
|
38 |
@classmethod
|
39 |
def init_from_json_file(self, json_filepath):
|
@@ -62,6 +64,9 @@ class EvalResult:
|
|
62 |
config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
|
63 |
)
|
64 |
|
|
|
|
|
|
|
65 |
# Get model and org
|
66 |
# org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
|
67 |
org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
|
@@ -116,6 +121,8 @@ class EvalResult:
|
|
116 |
architecture=architecture,
|
117 |
num_few_shots=num_few_shots,
|
118 |
add_special_tokens=add_special_tokens,
|
|
|
|
|
119 |
)
|
120 |
|
121 |
def update_with_request_file(self, requests_path):
|
@@ -153,6 +160,8 @@ class EvalResult:
|
|
153 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
154 |
AutoEvalColumn.num_few_shots.name: self.num_few_shots,
|
155 |
AutoEvalColumn.add_special_tokens.name: self.add_special_tokens,
|
|
|
|
|
156 |
}
|
157 |
|
158 |
# for task in Tasks:
|
|
|
7 |
import dateutil
|
8 |
|
9 |
from src.display.formatting import make_clickable_model
|
10 |
+
from src.display.utils import AutoEvalColumn, Backend, ModelType, Tasks, Version, WeightType
|
11 |
from src.submission.check_validity import is_model_on_hub
|
12 |
|
13 |
|
|
|
34 |
still_on_hub: bool = False
|
35 |
num_few_shots: str = "0"
|
36 |
add_special_tokens: str = ""
|
37 |
+
llm_jp_eval_version: str = ""
|
38 |
+
backend: str = ""
|
39 |
|
40 |
@classmethod
|
41 |
def init_from_json_file(self, json_filepath):
|
|
|
64 |
config.get("pipeline_kwargs", {"add_special_tokens": "Unknown"}).get("add_special_tokens")
|
65 |
)
|
66 |
|
67 |
+
version = Version.from_str(metainfo.get("version", "?")).value.name
|
68 |
+
backend = Backend.from_str(model_config.get("_target_", "?").split(".")[0]).value.name
|
69 |
+
|
70 |
# Get model and org
|
71 |
# org_and_model = config.get("model_name", config.get("offline_inference").get("model_name", None))
|
72 |
org_and_model = config.get("model_name", config.get("offline_inference", {}).get("model_name", "Unknown"))
|
|
|
121 |
architecture=architecture,
|
122 |
num_few_shots=num_few_shots,
|
123 |
add_special_tokens=add_special_tokens,
|
124 |
+
llm_jp_eval_version=version,
|
125 |
+
backend=backend,
|
126 |
)
|
127 |
|
128 |
def update_with_request_file(self, requests_path):
|
|
|
160 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
161 |
AutoEvalColumn.num_few_shots.name: self.num_few_shots,
|
162 |
AutoEvalColumn.add_special_tokens.name: self.add_special_tokens,
|
163 |
+
AutoEvalColumn.llm_jp_eval_version.name: self.llm_jp_eval_version,
|
164 |
+
AutoEvalColumn.backend.name: self.backend,
|
165 |
}
|
166 |
|
167 |
# for task in Tasks:
|