Shiyu Zhao commited on
Commit
85d9e2b
·
1 Parent(s): c2c76a6

Update space

Browse files
Files changed (2) hide show
  1. app.py +89 -34
  2. utils/hub_storage.py +17 -1
app.py CHANGED
@@ -220,9 +220,21 @@ def scan_submissions_directory():
220
  global df_synthesized_full, df_synthesized_10, df_human_generated
221
 
222
  try:
 
 
 
223
  # Get submissions directory content from HuggingFace hub
224
- submissions_content = hub_storage.list_repo_content("submissions")
225
- if not submissions_content:
 
 
 
 
 
 
 
 
 
226
  print("No submissions directory found or empty")
227
  return
228
 
@@ -233,39 +245,70 @@ def scan_submissions_directory():
233
  'human_generated_eval': []
234
  }
235
 
236
- # Iterate through team folders
237
- for folder in submissions_content:
238
- if not folder.endswith('/'): # Skip files
 
 
 
 
 
 
239
  continue
240
 
 
 
 
 
 
 
 
241
  try:
242
- # Get latest.json to find most recent submission
243
- latest_content = hub_storage.get_repo_content(f"{folder}latest.json")
244
- if not latest_content:
245
  continue
246
 
247
- latest_info = json.loads(latest_content)
248
- if latest_info.get('status') != 'pending_review': # Only include approved submissions
249
- timestamp = latest_info.get('latest_submission')
250
- if not timestamp:
251
- continue
252
-
253
- # Get metadata file for this submission
254
- metadata_path = f"{folder}metadata_{timestamp}.json"
255
- metadata_content = hub_storage.get_repo_content(metadata_path)
256
- if not metadata_content:
257
- continue
258
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  submission_data = json.loads(metadata_content)
260
- split = submission_data.get('Split')
261
- if split in submissions_by_split:
262
- submissions_by_split[split].append(submission_data)
263
-
264
- # Update corresponding DataFrame
265
- update_leaderboard_data(submission_data)
266
-
 
 
 
 
267
  except Exception as e:
268
- print(f"Error processing folder {folder}: {str(e)}")
269
  continue
270
 
271
  print("Leaderboard initialized with existing submissions:")
@@ -290,6 +333,8 @@ def initialize_leaderboard():
290
  df_synthesized_10 = pd.DataFrame(data_synthesized_10)
291
  df_human_generated = pd.DataFrame(data_human_generated)
292
 
 
 
293
  # Then scan and add submitted results
294
  scan_submissions_directory()
295
 
@@ -298,6 +343,22 @@ def initialize_leaderboard():
298
  except Exception as e:
299
  print(f"Error initializing leaderboard: {str(e)}")
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
 
302
  def save_submission(submission_data, csv_file):
303
  """
@@ -893,12 +954,6 @@ with gr.Blocks(css=css) as demo:
893
  outputs=result
894
  )
895
 
896
- # Initial table update
897
- demo.load(
898
- update_tables,
899
- inputs=[model_type_filter],
900
- outputs=all_dfs
901
- )
902
 
903
  # Launch the application
904
  demo.launch()
 
220
  global df_synthesized_full, df_synthesized_10, df_human_generated
221
 
222
  try:
223
+ # Initialize HuggingFace API
224
+ api = HfApi()
225
+
226
  # Get submissions directory content from HuggingFace hub
227
+ try:
228
+ repo_files = api.list_files_info(
229
+ repo_id=REPO_ID,
230
+ repo_type="space",
231
+ paths=["submissions"]
232
+ )
233
+ except Exception as e:
234
+ print(f"Error listing repository contents: {str(e)}")
235
+ return
236
+
237
+ if not repo_files:
238
  print("No submissions directory found or empty")
239
  return
240
 
 
245
  'human_generated_eval': []
246
  }
247
 
248
+ # Group files by team folders
249
+ folder_files = {}
250
+ for file_info in repo_files:
251
+ path = file_info.path
252
+ if not path.startswith('submissions/'):
253
+ continue
254
+
255
+ parts = path.split('/')
256
+ if len(parts) < 3: # submissions/folder_name/file
257
  continue
258
 
259
+ folder_name = parts[1]
260
+ if folder_name not in folder_files:
261
+ folder_files[folder_name] = []
262
+ folder_files[folder_name].append(path)
263
+
264
+ # Process each team folder
265
+ for folder_name, files in folder_files.items():
266
  try:
267
+ # Look for latest.json
268
+ latest_file = next((f for f in files if f.endswith('latest.json')), None)
269
+ if not latest_file:
270
  continue
271
 
272
+ # Read latest.json
273
+ try:
274
+ latest_content = hub_storage.get_file_content(latest_file)
275
+ latest_info = json.loads(latest_content)
276
+ except Exception as e:
277
+ print(f"Error reading latest.json for {folder_name}: {str(e)}")
278
+ continue
279
+
280
+ if latest_info.get('status') != 'approved':
281
+ continue
282
+
283
+ timestamp = latest_info.get('latest_submission')
284
+ if not timestamp:
285
+ continue
286
+
287
+ # Find corresponding metadata file
288
+ metadata_file = next(
289
+ (f for f in files if f.endswith(f'metadata_{timestamp}.json')),
290
+ None
291
+ )
292
+ if not metadata_file:
293
+ continue
294
+
295
+ # Read metadata file
296
+ try:
297
+ metadata_content = hub_storage.get_file_content(metadata_file)
298
  submission_data = json.loads(metadata_content)
299
+ except Exception as e:
300
+ print(f"Error reading metadata for {folder_name}: {str(e)}")
301
+ continue
302
+
303
+ split = submission_data.get('Split')
304
+ if split in submissions_by_split:
305
+ submissions_by_split[split].append(submission_data)
306
+
307
+ # Update corresponding DataFrame
308
+ update_leaderboard_data(submission_data)
309
+
310
  except Exception as e:
311
+ print(f"Error processing folder {folder_name}: {str(e)}")
312
  continue
313
 
314
  print("Leaderboard initialized with existing submissions:")
 
333
  df_synthesized_10 = pd.DataFrame(data_synthesized_10)
334
  df_human_generated = pd.DataFrame(data_human_generated)
335
 
336
+ print("Initialized with baseline results")
337
+
338
  # Then scan and add submitted results
339
  scan_submissions_directory()
340
 
 
343
  except Exception as e:
344
  print(f"Error initializing leaderboard: {str(e)}")
345
 
346
+ # Utility function to get file content
347
+ def get_file_content(file_path):
348
+ """
349
+ Helper function to safely read file content from HuggingFace repository
350
+ """
351
+ try:
352
+ api = HfApi()
353
+ content = api.file_download(
354
+ repo_id=REPO_ID,
355
+ repo_type="space",
356
+ filename=file_path
357
+ )
358
+ return content.read().decode('utf-8')
359
+ except Exception as e:
360
+ print(f"Error reading file {file_path}: {str(e)}")
361
+ return None
362
 
363
  def save_submission(submission_data, csv_file):
364
  """
 
954
  outputs=result
955
  )
956
 
 
 
 
 
 
 
957
 
958
  # Launch the application
959
  demo.launch()
utils/hub_storage.py CHANGED
@@ -27,4 +27,20 @@ class HubStorage:
27
  filename=path_in_repo,
28
  repo_type="space",
29
  token=self.token
30
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  filename=path_in_repo,
28
  repo_type="space",
29
  token=self.token
30
+ )
31
+
32
+ def get_file_content(self, file_path):
33
+ """
34
+ Get content of a file from the repository
35
+ """
36
+ try:
37
+ api = HfApi()
38
+ content = api.file_download(
39
+ repo_id=self.repo_id,
40
+ repo_type="space",
41
+ filename=file_path
42
+ )
43
+ return content.read().decode('utf-8')
44
+ except Exception as e:
45
+ print(f"Error reading file {file_path}: {str(e)}")
46
+ return None