TracyMc commited on
Commit
4224b43
β€’
1 Parent(s): e2eea98
Files changed (1) hide show
  1. app.py +56 -14
app.py CHANGED
@@ -3,8 +3,10 @@ import json
3
  import pandas as pd
4
  from collections import defaultdict
5
  import copy as cp
6
- from urllib.request import urlopen
7
  import re
 
 
8
 
9
  # Constants
10
  CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
@@ -22,11 +24,37 @@ GITHUB_REPO = 'https://github.com/open-compass/opencompass'
22
  GITHUB_RAW = 'https://raw.githubusercontent.com/open-compass/opencompass'
23
  GITHUB_BLOB = 'https://github.com/open-compass/opencompass/blob'
24
 
25
- # URL for the JSON data
26
- DATA_URL = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.24-12.20241205.json"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Markdown content
29
- MAIN_LEADERBOARD_TITLE = "# CompassAcademic Leaderboard"
 
 
30
  MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
31
  The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
32
  - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
@@ -34,7 +62,6 @@ The CompassAcademic currently focuses on the comprehensive reasoning abilities o
34
  - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)πŸ†.
35
  """
36
 
37
-
38
  def fix_image_urls(content):
39
  """Fix image URLs in markdown content."""
40
  # Handle the specific logo.svg path
@@ -57,8 +84,8 @@ MODEL_SIZE = ['<10B', '10B-70B', '>70B', 'Unknown']
57
  MODEL_TYPE = ['API', 'OpenSource']
58
 
59
 
60
- def load_data():
61
- response = urlopen(DATA_URL)
62
  data = json.loads(response.read().decode('utf-8'))
63
  return data
64
 
@@ -141,7 +168,6 @@ def filter_table(df, size_ranges, model_types):
141
  type_mask |= filtered_df['OpenSource'] == 'Yes'
142
  filtered_df = filtered_df[type_mask]
143
 
144
- # η›΄ζŽ₯θΏ”ε›žθΏ‡ζ»€εŽηš„ DataFrame
145
  return filtered_df
146
 
147
 
@@ -172,11 +198,13 @@ def calculate_column_widths(df):
172
 
173
 
174
  def create_interface():
175
- data = load_data()
 
176
  df = build_main_table(data)
 
177
 
178
  with gr.Blocks() as demo:
179
- gr.Markdown(MAIN_LEADERBOARD_TITLE)
180
 
181
  with gr.Tabs() as tabs:
182
  with gr.TabItem("πŸ… Main Leaderboard", elem_id='main'):
@@ -206,6 +234,22 @@ def create_interface():
206
  column_widths=calculate_column_widths(df),
207
  )
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  def update_table(size_ranges, model_types):
210
  filtered_df = filter_table(df, size_ranges, model_types)
211
  return filtered_df.sort_values(
@@ -224,10 +268,8 @@ def create_interface():
224
  outputs=table,
225
  )
226
 
227
- # with gr.TabItem("πŸ” About", elem_id='about'):
228
- # readme_content = urlopen(OPENCOMPASS_README).read().decode()
229
- # fixed_content = fix_image_urls(readme_content)
230
- # gr.Markdown(fixed_content)
231
 
232
  with gr.Row():
233
  with gr.Accordion("Citation", open=False):
 
3
  import pandas as pd
4
  from collections import defaultdict
5
  import copy as cp
6
+ from urllib.request import urlopen, URLError
7
  import re
8
+ from datetime import datetime
9
+ import time
10
 
11
  # Constants
12
  CITATION_BUTTON_TEXT = r"""@misc{2023opencompass,
 
24
  GITHUB_RAW = 'https://raw.githubusercontent.com/open-compass/opencompass'
25
  GITHUB_BLOB = 'https://github.com/open-compass/opencompass/blob'
26
 
27
+ # Base URL for the JSON data
28
+ DATA_URL_BASE = "http://opencompass.oss-cn-shanghai.aliyuncs.com/assets/research-rank/research-data.REALTIME."
29
+
30
+ def find_latest_data_url():
31
+ """Find the latest available data URL by trying different dates."""
32
+ today = datetime.now()
33
+ # Try last 365 days
34
+ for i in range(365):
35
+ date = today.replace(day=today.day - i)
36
+ date_str = date.strftime("%Y%m%d")
37
+ url = f"{DATA_URL_BASE}{date_str}.json"
38
+ try:
39
+ urlopen(url)
40
+ return url, date_str
41
+ except URLError:
42
+ continue
43
+ # If no valid URL found, return None
44
+ return None, None
45
+
46
+ def get_latest_data():
47
+ """Get latest data URL and update time"""
48
+ data_url, update_time = find_latest_data_url()
49
+ if not data_url:
50
+ raise Exception("Could not find valid data URL")
51
+ formatted_update_time = datetime.strptime(update_time, "%Y%m%d").strftime("%Y-%m-%d")
52
+ return data_url, formatted_update_time
53
 
54
  # Markdown content
55
+ def get_leaderboard_title(update_time):
56
+ return f"# CompassAcademic Leaderboard (Last Updated: {update_time})"
57
+
58
  MAIN_LEADERBOARD_DESCRIPTION = """## Main Evaluation Results
59
  The CompassAcademic currently focuses on the comprehensive reasoning abilities of LLMs.
60
  - The datasets selected so far include General Knowledge Reasoning (MMLU-Pro/GPQA-Diamond), Logical Reasoning (BBH), Mathematical Reasoning (MATH-500, AIME), Code Completion (LiveCodeBench, HumanEval), and Instruction Following (IFEval).
 
62
  - Prompts and reproduction scripts can be found in [**OpenCompass**: A Toolkit for Evaluation of LLMs](https://github.com/open-compass/opencompass)πŸ†.
63
  """
64
 
 
65
  def fix_image_urls(content):
66
  """Fix image URLs in markdown content."""
67
  # Handle the specific logo.svg path
 
84
  MODEL_TYPE = ['API', 'OpenSource']
85
 
86
 
87
+ def load_data(data_url):
88
+ response = urlopen(data_url)
89
  data = json.loads(response.read().decode('utf-8'))
90
  return data
91
 
 
168
  type_mask |= filtered_df['OpenSource'] == 'Yes'
169
  filtered_df = filtered_df[type_mask]
170
 
 
171
  return filtered_df
172
 
173
 
 
198
 
199
 
200
  def create_interface():
201
+ data_url, update_time = get_latest_data()
202
+ data = load_data(data_url)
203
  df = build_main_table(data)
204
+ title = gr.Markdown(get_leaderboard_title(update_time))
205
 
206
  with gr.Blocks() as demo:
207
+ title_comp = gr.Markdown(get_leaderboard_title(update_time))
208
 
209
  with gr.Tabs() as tabs:
210
  with gr.TabItem("πŸ… Main Leaderboard", elem_id='main'):
 
234
  column_widths=calculate_column_widths(df),
235
  )
236
 
237
+ def update_data():
238
+ """Periodically check for new data and update the interface"""
239
+ while True:
240
+ time.sleep(300) # Check every 5 minutes
241
+ try:
242
+ new_data_url, new_update_time = get_latest_data()
243
+ if new_data_url != data_url:
244
+ new_data = load_data(new_data_url)
245
+ new_df = build_main_table(new_data)
246
+ filtered_df = filter_table(new_df, size_filter.value, type_filter.value)
247
+ title_comp.value = get_leaderboard_title(new_update_time)
248
+ table.value = filtered_df.sort_values("Average Score", ascending=False)
249
+ except Exception as e:
250
+ print(f"Error updating data: {e}")
251
+ continue
252
+
253
  def update_table(size_ranges, model_types):
254
  filtered_df = filter_table(df, size_ranges, model_types)
255
  return filtered_df.sort_values(
 
268
  outputs=table,
269
  )
270
 
271
+ # Set up periodic data update
272
+ demo.load(update_data)
 
 
273
 
274
  with gr.Row():
275
  with gr.Accordion("Citation", open=False):