taxfree_python commited on
Commit
7d9cce6
·
1 Parent(s): 178e3c8

Rebuild the project

Browse files
app.py CHANGED
@@ -1,204 +1,37 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
- import pandas as pd
4
- from apscheduler.schedulers.background import BackgroundScheduler
5
- from huggingface_hub import snapshot_download
6
 
7
- from src.about import (
8
- CITATION_BUTTON_LABEL,
9
- CITATION_BUTTON_TEXT,
10
- EVALUATION_QUEUE_TEXT,
11
- INTRODUCTION_TEXT,
12
- LLM_BENCHMARKS_TEXT,
13
- TITLE,
14
- )
15
- from src.display.css_html_js import custom_css
16
- from src.display.utils import (
17
- BENCHMARK_COLS,
18
- COLS,
19
- EVAL_COLS,
20
- EVAL_TYPES,
21
- AutoEvalColumn,
22
- ModelType,
23
- fields,
24
- WeightType,
25
- Precision
26
- )
27
- from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
28
- from src.populate import get_evaluation_queue_df, get_leaderboard_df
29
- from src.submission.submit import add_new_eval
30
-
31
-
32
- def restart_space():
33
- API.restart_space(repo_id=REPO_ID)
34
-
35
- ### Space initialisation
36
- try:
37
- print(EVAL_REQUESTS_PATH)
38
- snapshot_download(
39
- repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
40
- )
41
- except Exception:
42
- restart_space()
43
- try:
44
- print(EVAL_RESULTS_PATH)
45
- snapshot_download(
46
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
47
- )
48
- except Exception:
49
- restart_space()
50
-
51
-
52
- LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
53
 
54
- (
55
- finished_eval_queue_df,
56
- running_eval_queue_df,
57
- pending_eval_queue_df,
58
- ) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
59
 
60
- def init_leaderboard(dataframe):
61
- if dataframe is None or dataframe.empty:
62
- raise ValueError("Leaderboard DataFrame is empty or None.")
63
- return Leaderboard(
64
- value=dataframe,
65
- datatype=[c.type for c in fields(AutoEvalColumn)],
66
- select_columns=SelectColumns(
67
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
68
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
69
- label="Select Columns to Display:",
70
- ),
71
- search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
72
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
73
- filter_columns=[
74
- ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
75
- ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
76
- ColumnFilter(
77
- AutoEvalColumn.params.name,
78
- type="slider",
79
- min=0.01,
80
- max=150,
81
- label="Select the number of parameters (B)",
82
- ),
83
- ColumnFilter(
84
- AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
85
- ),
86
- ],
87
- bool_checkboxgroup_label="Hide models",
88
- interactive=False,
89
- )
90
 
91
 
92
- demo = gr.Blocks(css=custom_css)
93
- with demo:
94
- gr.HTML(TITLE)
95
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
96
-
97
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
98
- with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
99
- leaderboard = init_leaderboard(LEADERBOARD_DF)
100
-
101
- with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
102
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
103
-
104
- with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
105
- with gr.Column():
106
- with gr.Row():
107
- gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
108
-
109
- with gr.Column():
110
- with gr.Accordion(
111
- f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
112
- open=False,
113
- ):
114
- with gr.Row():
115
- finished_eval_table = gr.components.Dataframe(
116
- value=finished_eval_queue_df,
117
- headers=EVAL_COLS,
118
- datatype=EVAL_TYPES,
119
- row_count=5,
120
- )
121
- with gr.Accordion(
122
- f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
123
- open=False,
124
- ):
125
- with gr.Row():
126
- running_eval_table = gr.components.Dataframe(
127
- value=running_eval_queue_df,
128
- headers=EVAL_COLS,
129
- datatype=EVAL_TYPES,
130
- row_count=5,
131
- )
132
 
133
- with gr.Accordion(
134
- f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
135
- open=False,
136
- ):
137
- with gr.Row():
138
- pending_eval_table = gr.components.Dataframe(
139
- value=pending_eval_queue_df,
140
- headers=EVAL_COLS,
141
- datatype=EVAL_TYPES,
142
- row_count=5,
143
- )
144
- with gr.Row():
145
- gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
146
 
147
- with gr.Row():
148
- with gr.Column():
149
- model_name_textbox = gr.Textbox(label="Model name")
150
- revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
151
- model_type = gr.Dropdown(
152
- choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
153
- label="Model type",
154
- multiselect=False,
155
- value=None,
156
- interactive=True,
157
- )
158
 
159
- with gr.Column():
160
- precision = gr.Dropdown(
161
- choices=[i.value.name for i in Precision if i != Precision.Unknown],
162
- label="Precision",
163
- multiselect=False,
164
- value="float16",
165
- interactive=True,
166
- )
167
- weight_type = gr.Dropdown(
168
- choices=[i.value.name for i in WeightType],
169
- label="Weights type",
170
- multiselect=False,
171
- value="Original",
172
- interactive=True,
173
- )
174
- base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
175
 
176
- submit_button = gr.Button("Submit Eval")
177
- submission_result = gr.Markdown()
178
- submit_button.click(
179
- add_new_eval,
180
- [
181
- model_name_textbox,
182
- base_model_name_textbox,
183
- revision_name_textbox,
184
- precision,
185
- weight_type,
186
- model_type,
187
- ],
188
- submission_result,
189
- )
190
 
191
- with gr.Row():
192
- with gr.Accordion("📙 Citation", open=False):
193
- citation_button = gr.Textbox(
194
- value=CITATION_BUTTON_TEXT,
195
- label=CITATION_BUTTON_LABEL,
196
- lines=20,
197
- elem_id="citation-button",
198
- show_copy_button=True,
199
- )
200
 
201
- scheduler = BackgroundScheduler()
202
- scheduler.add_job(restart_space, "interval", seconds=1800)
203
- scheduler.start()
204
- demo.queue(default_concurrency_limit=40).launch()
 
1
  import gradio as gr
 
 
 
 
2
 
3
+ from leaderboard.dataset import load_or_initialize_leaderboard
4
+ from leaderboard.submission import submit_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
 
 
 
 
 
6
 
7
+ # リーダーボード表示
8
+ def display_leaderboard():
9
+ dataset = load_or_initialize_leaderboard()
10
+ return dataset.to_pandas()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
 
13
+ # Gradio のコンポーネント
14
+ leaderboard_component = gr.DataFrame(
15
+ display_leaderboard, headers=["Model Name", "Score", "Rank"], interactive=False, label="Leaderboard"
16
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ submit_form = gr.Interface(
19
+ submit_model,
20
+ inputs=[gr.Textbox(label="Model Name"), gr.File(label="Model File")],
21
+ outputs=gr.DataFrame(headers=["Model Name", "Score", "Rank"], interactive=False),
22
+ )
 
 
 
 
 
 
 
 
23
 
24
+ # Gradio アプリケーション
25
+ app = gr.Blocks()
 
 
 
 
 
 
 
 
 
26
 
27
+ with app:
28
+ gr.Markdown("# human_methylation_bench_ver1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ with gr.Tab("Leaderboard"):
31
+ leaderboard_component.render()
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ with gr.Tab("Submit Model"):
34
+ submit_form.render()
 
 
 
 
 
 
 
35
 
36
+ if __name__ == "__main__":
37
+ app.launch()
 
 
leaderboard/__init__.py ADDED
File without changes
leaderboard/dataset.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import Dataset, load_dataset
2
+
3
+ DATASET_PATH = "leaderboard_dataset"
4
+
5
+ # 初期データ
6
+ INITIAL_DATA = {
7
+ "Model Name": ["Baseline Model"],
8
+ "Score": [0.8],
9
+ "Rank": [1],
10
+ }
11
+
12
+
13
+ # データセットを初期化またはロード
14
+ def load_or_initialize_leaderboard():
15
+ try:
16
+ dataset = Dataset.load_from_disk(DATASET_PATH)
17
+ except FileNotFoundError:
18
+ dataset = Dataset.from_dict(INITIAL_DATA)
19
+ dataset.save_to_disk(DATASET_PATH)
20
+ return dataset
21
+
22
+
23
+ # データセットを保存
24
+ def save_leaderboard(dataset):
25
+ dataset.save_to_disk(DATASET_PATH)
leaderboard/evaluation.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # ダミーの評価関数
2
+ def evaluate_model(model_path):
3
+ """
4
+ 提出モデルを評価してスコアを返す関数。
5
+ 本番ではモデルをロードしてテストデータに基づくスコアを計算する。
6
+ """
7
+ # TODO: 実際の評価ロジックを実装する
8
+ return 0.75 # 仮のスコア
leaderboard/submission.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import Dataset
2
+
3
+ from .dataset import load_or_initialize_leaderboard, save_leaderboard
4
+ from .evaluation import evaluate_model
5
+
6
+
7
+ def submit_model(model_name, model_file):
8
+ """
9
+ モデルの提出を処理する関数。
10
+ 1. モデルを評価する。
11
+ 2. リーダーボードにデータを追加。
12
+ 3. ランクを計算して保存。
13
+ """
14
+ dataset = load_or_initialize_leaderboard()
15
+
16
+ # モデル評価
17
+ score = evaluate_model(model_file.name)
18
+
19
+ # データに新しいモデルを追加
20
+ new_entry = {"Model Name": model_name, "Score": score}
21
+ dataset = dataset.add_item(new_entry)
22
+
23
+ # ランク付け
24
+ df = dataset.to_pandas()
25
+ df = df.sort_values(by="Score", ascending=False).reset_index(drop=True)
26
+ df["Rank"] = range(1, len(df) + 1)
27
+
28
+ # データセットを更新・保存
29
+ updated_dataset = Dataset.from_pandas(df)
30
+ save_leaderboard(updated_dataset)
31
+
32
+ return df
pyproject.toml CHANGED
@@ -1,3 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  [tool.ruff]
2
  # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
3
  select = ["E", "F"]
@@ -10,4 +27,4 @@ profile = "black"
10
  line_length = 119
11
 
12
  [tool.black]
13
- line-length = 119
 
1
+ [tool.poetry]
2
+ name = "human-methylation-bench-ver1"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["Your Name <you@example.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.12"
10
+ gradio = "^5.6.0"
11
+ pandas = "^2.2.3"
12
+ datasets = "^3.1.0"
13
+
14
+ [build-system]
15
+ requires = ["poetry-core"]
16
+ build-backend = "poetry.core.masonry.api"
17
+
18
  [tool.ruff]
19
  # Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default.
20
  select = ["E", "F"]
 
27
  line_length = 119
28
 
29
  [tool.black]
30
+ line-length = 119