hi-melnikov commited on
Commit
ce477d4
β€’
1 Parent(s): 985ab6e

Trying to fix all the problems with the app

Browse files
Files changed (1) hide show
  1. app.py +69 -75
app.py CHANGED
@@ -72,9 +72,10 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, ba
72
  attempt += 1
73
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
74
 
75
- def init_space(full_init: bool = True):
76
  """Initializes the application space, loading only necessary data."""
77
- if full_init:
 
78
  # These downloads only occur on full initialization
79
  # try:
80
  # download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
@@ -87,75 +88,70 @@ def init_space(full_init: bool = True):
87
  # restart_space()
88
 
89
  # Always retrieve the leaderboard DataFrame
90
- original_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
91
-
92
-
93
- leaderboard_df = original_df.copy()
94
-
95
- return leaderboard_df
96
-
97
- # Convert the environment variable "LEADERBOARD_FULL_INIT" to a boolean value, defaulting to True if the variable is not set.
98
- # This controls whether a full initialization should be performed.
99
- do_full_init = os.getenv("LEADERBOARD_FULL_INIT", "True") == "True"
100
-
101
- # Calls the init_space function with the `full_init` parameter determined by the `do_full_init` variable.
102
- # This initializes various DataFrames used throughout the application, with the level of initialization detail controlled by the `do_full_init` flag.
103
- # leaderboard_df = init_space(full_init=do_full_init)
104
-
105
- demo = gr.Blocks(css=custom_css)
106
- with demo:
107
- gr.HTML(TITLE)
108
- gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
109
-
110
- with gr.Tabs(elem_classes="tab-buttons") as tabs:
111
- with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
112
- pass
113
- """
114
- leaderboard = Leaderboard(
115
- value=leaderboard_df,
116
- datatype=[c.type for c in fields(AutoEvalColumn)],
117
- select_columns=SelectColumns(
118
- default_selection=[
119
- c.name
120
- for c in fields(AutoEvalColumn)
121
- if c.displayed_by_default
122
  ],
123
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
124
- label="Select Columns to Display:",
125
- ),
126
- search_columns=[
127
- AutoEvalColumn.model.name,
128
- # AutoEvalColumn.fullname.name,
129
- # AutoEvalColumn.license.name
130
- ],
131
- )
132
- """
133
-
134
-
135
- with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
136
- gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
137
-
138
- with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=4):
139
- gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
140
-
141
- with gr.TabItem("πŸš€ Submit ", elem_id="llm-benchmark-tab-table", id=5):
142
-
143
- with gr.Row():
144
- gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
145
-
146
- with gr.Column():
147
- model_name_textbox = gr.Textbox(label="Model name")
148
- def upload_file(file):
149
- file_path = file.name.split('/')[-1] if '/' in file.name else file.name
150
- logging.info("New submition: file saved to %s", file_path)
151
- API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset')
152
- os.environ[RESET_JUDGEMENT_ENV] = '1'
153
-
154
- return file.name
155
- if model_name_textbox:
156
- file_output = gr.File()
157
- upload_button = gr.UploadButton("Click to Upload & Submit Answers", file_types=['*'], file_count="single")
158
- upload_button.upload(upload_file, upload_button, file_output)
159
 
160
  # print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
161
  # print(os.system('cd src/gen/ && python show_result.py --output'))
@@ -164,11 +160,8 @@ def update_board():
164
  need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
165
  if need_reset != '1':
166
  return
167
-
168
  os.environ[RESET_JUDGEMENT_ENV] = '0'
169
-
170
  subprocess.run(['python', 'src/gen/gen_judgement.py'], check = False)
171
-
172
  subprocess.Popen('python3.src/gen/show_result.py --output')
173
 
174
 
@@ -178,5 +171,6 @@ if __name__ == "__main__":
178
  scheduler = BackgroundScheduler()
179
  scheduler.add_job(update_board, "interval", minutes=10)
180
  scheduler.start()
181
-
182
- demo.queue(default_concurrency_limit=40).launch(debug=True)
 
 
72
  attempt += 1
73
  raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
74
 
75
+ def build_leadearboard_df():
76
  """Initializes the application space, loading only necessary data."""
77
+ # Check ENV LEADERBOARD_DOWNLOAD if wee need to download the leaderboard
78
+ if os.getenv("LEADERBOARD_DOWNLOAD", "True") == "True":
79
  # These downloads only occur on full initialization
80
  # try:
81
  # download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
 
88
  # restart_space()
89
 
90
  # Always retrieve the leaderboard DataFrame
91
+ leaderboard_df = pd.DataFrame.from_records(json.load(open('eval-results/evals/upd.json','r')))
92
+ return leaderboard_df.copy()
93
+
94
+ def build_demo():
95
+ demo = gr.Blocks(
96
+ title = "Chatbot Arena Leaderboard",
97
+ css=custom_css
98
+ )
99
+ # leaderboard_df = build_leadearboard_df()
100
+ with demo:
101
+ gr.HTML(TITLE)
102
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
103
+
104
+ with gr.Tabs(elem_classes="tab-buttons") as tabs:
105
+ with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
106
+ """
107
+ leaderboard = Leaderboard(
108
+ value=leaderboard_df,
109
+ datatype=[c.type for c in fields(AutoEvalColumn)],
110
+ select_columns=SelectColumns(
111
+ default_selection=[
112
+ c.name
113
+ for c in fields(AutoEvalColumn)
114
+ if c.displayed_by_default
115
+ ],
116
+ cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
117
+ label="Select Columns to Display:",
118
+ ),
119
+ search_columns=[
120
+ AutoEvalColumn.model.name,
121
+ # AutoEvalColumn.fullname.name,
122
+ # AutoEvalColumn.license.name
123
  ],
124
+ )
125
+ """
126
+ pass
127
+
128
+
129
+ with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=1):
130
+ gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
131
+
132
+ with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
133
+ gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")
134
+
135
+ with gr.TabItem("πŸš€ Submit ", elem_id="llm-benchmark-tab-table", id=3):
136
+
137
+ with gr.Row():
138
+ gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")
139
+
140
+ with gr.Column():
141
+ model_name_textbox = gr.Textbox(label="Model name")
142
+ def upload_file(file):
143
+ file_path = file.name.split('/')[-1] if '/' in file.name else file.name
144
+ logging.info("New submition: file saved to %s", file_path)
145
+ API.upload_file(path_or_fileobj=file.name,path_in_repo='./external/'+file_path,repo_id='Vikhrmodels/openbench-eval',repo_type='dataset')
146
+ os.environ[RESET_JUDGEMENT_ENV] = '1'
147
+ return file.name
148
+
149
+ if model_name_textbox:
150
+ file_output = gr.File()
151
+ upload_button = gr.UploadButton("Click to Upload & Submit Answers", file_types=['*'], file_count="single")
152
+ upload_button.upload(upload_file, upload_button, file_output)
153
+
154
+ return demo
 
 
 
 
 
155
 
156
  # print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
157
  # print(os.system('cd src/gen/ && python show_result.py --output'))
 
160
  need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
161
  if need_reset != '1':
162
  return
 
163
  os.environ[RESET_JUDGEMENT_ENV] = '0'
 
164
  subprocess.run(['python', 'src/gen/gen_judgement.py'], check = False)
 
165
  subprocess.Popen('python3.src/gen/show_result.py --output')
166
 
167
 
 
171
  scheduler = BackgroundScheduler()
172
  scheduler.add_job(update_board, "interval", minutes=10)
173
  scheduler.start()
174
+
175
+ demo_app = build_demo()
176
+ demo_app.launch(debug=True)