Shiyu Zhao commited on
Commit
b65f2ef
·
1 Parent(s): 6542cc6

Update space

Browse files
Files changed (1) hide show
  1. app.py +142 -95
app.py CHANGED
@@ -11,10 +11,57 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
11
  import smtplib
12
  from email.mime.multipart import MIMEMultipart
13
  from email.mime.text import MIMEText
 
 
 
14
 
15
  from stark_qa import load_qa
16
  from stark_qa.evaluator import Evaluator
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def process_single_instance(args):
20
  idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
@@ -465,7 +512,7 @@ def send_submission_confirmation(meta_data, eval_results):
465
  except Exception as e:
466
  print(f"Failed to send submission confirmation: {e}")
467
 
468
-
469
  def process_submission(
470
  method_name, team_name, dataset, split, contact_email,
471
  code_repo, csv_file, model_description, hardware, paper_link
@@ -499,112 +546,112 @@ def process_submission(
499
  "(Optional) Paper link": paper_link
500
  }
501
 
502
- # Save CSV file
 
 
 
 
503
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
504
- model_name_clean = sanitize_name(method_name)
505
- team_name_clean = sanitize_name(team_name)
506
-
507
- # Create directory structure in the HuggingFace space
508
- base_dir = "submissions" # This will be in the HF space root
509
- submission_dir = os.path.join(base_dir, f"{model_name_clean}_{team_name_clean}")
510
- os.makedirs(submission_dir, exist_ok=True)
511
 
512
  # Save CSV file
513
  csv_filename = f"predictions_{timestamp}.csv"
514
- csv_path = os.path.join(submission_dir, csv_filename)
515
- if hasattr(csv_file, 'name'):
516
- with open(csv_file.name, 'rb') as source, open(csv_path, 'wb') as target:
517
- target.write(source.read())
518
 
519
- # Validate CSV file
520
- csv_valid, csv_message = validate_csv(csv_file)
521
- if not csv_valid:
522
- error_message = f"Error with CSV file: {csv_message}"
523
- send_error_notification(meta_data, error_message)
524
- return error_message
525
 
526
- # Process CSV file through evaluation pipeline
527
  try:
528
- results = compute_metrics(
529
- csv_file.name,
530
- dataset=dataset.lower(),
531
- split=split,
532
- num_workers=4
 
 
 
 
 
 
533
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
534
 
535
- if isinstance(results, str) and results.startswith("Error"):
536
- send_error_notification(meta_data, results)
537
- return f"Evaluation error: {results}"
538
-
539
- # Multiply results by 100 and round to 2 decimal places
540
- processed_results = {
541
- "hit@1": round(results['hit@1'] * 100, 2),
542
- "hit@5": round(results['hit@5'] * 100, 2),
543
- "recall@20": round(results['recall@20'] * 100, 2),
544
- "mrr": round(results['mrr'] * 100, 2)
545
- }
546
-
547
- # Prepare submission data
548
- submission_data = {
549
- "method_name": method_name,
550
- "team_name": team_name,
551
- "dataset": dataset,
552
- "split": split,
553
- "contact_email": contact_email,
554
- "code_repo": code_repo,
555
- "model_description": model_description,
556
- "hardware": hardware,
557
- "paper_link": paper_link,
558
- "results": processed_results,
559
- "status": "pending_review",
560
- "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
561
- "csv_path": csv_path
562
- }
563
-
564
- # Save metadata
565
- metadata_path = os.path.join(submission_dir, f"metadata_{timestamp}.json")
566
- with open(metadata_path, 'w') as f:
567
- json.dump(submission_data, f, indent=4)
568
-
569
- # Save latest.json
570
- latest_path = os.path.join(submission_dir, "latest.json")
571
- with open(latest_path, 'w') as f:
572
- json.dump({
573
- "latest_submission": timestamp,
574
- "status": "pending_review",
575
- "method_name": method_name
576
- }, f, indent=4)
577
-
578
- # Send email confirmation
579
- send_submission_confirmation(meta_data, processed_results)
580
-
581
- # Update leaderboard data
582
- update_leaderboard_data(submission_data)
583
-
584
- return f"""
585
- Submission successful!
586
-
587
- Evaluation Results:
588
- Hit@1: {processed_results['hit@1']:.2f}%
589
- Hit@5: {processed_results['hit@5']:.2f}%
590
- Recall@20: {processed_results['recall@20']:.2f}%
591
- MRR: {processed_results['mrr']:.2f}%
592
-
593
- Your submission has been saved and is pending review.
594
- A confirmation email has been sent to {contact_email}.
595
- Once approved, your results will appear in the leaderboard under the method name: {method_name}
596
- """
597
-
598
- except Exception as e:
599
- error_message = f"Error processing submission: {str(e)}"
600
- send_error_notification(meta_data, error_message)
601
- return error_message
602
-
603
  except Exception as e:
604
  error_message = f"Error processing submission: {str(e)}"
605
- send_error_notification(meta_data, error_message)
 
606
  return error_message
607
-
608
  def filter_by_model_type(df, selected_types):
609
  if not selected_types:
610
  return df.head(0)
 
11
  import smtplib
12
  from email.mime.multipart import MIMEMultipart
13
  from email.mime.text import MIMEText
14
+ from huggingface_hub import HfApi
15
+ from tempfile import NamedTemporaryFile
16
+ import shutil
17
 
18
  from stark_qa import load_qa
19
  from stark_qa.evaluator import Evaluator
20
 
21
+ from utils.hub_storage import HubStorage
22
+ from utils.token_handler import TokenHandler
23
+
24
+ # Initialize storage once at startup
25
+ try:
26
+ REPO_ID = "your-username/your-space-name" # Replace with your space name
27
+ hub_storage = HubStorage(REPO_ID)
28
+ except Exception as e:
29
+ raise RuntimeError(f"Failed to initialize HuggingFace Hub storage: {e}")
30
+
31
+ def process_submission(
32
+ method_name, team_name, dataset, split, contact_email,
33
+ code_repo, csv_file, model_description, hardware, paper_link
34
+ ):
35
+ """Process and validate submission"""
36
+ try:
37
+ # Your existing validation code here...
38
+
39
+ # Save CSV file using hub_storage
40
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
41
+ folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
42
+ csv_filename = f"predictions_{timestamp}.csv"
43
+ csv_path_in_repo = f"submissions/{folder_name}/{csv_filename}"
44
+
45
+ with NamedTemporaryFile(mode='wb', delete=False) as tmp_file:
46
+ shutil.copyfileobj(csv_file.file, tmp_file)
47
+
48
+ try:
49
+ hub_storage.save_to_hub(
50
+ file_content=tmp_file.name,
51
+ path_in_repo=csv_path_in_repo,
52
+ commit_message=f"Add submission: {method_name} by {team_name}"
53
+ )
54
+ finally:
55
+ os.unlink(tmp_file.name)
56
+
57
+ # Rest of your submission processing code...
58
+
59
+ except Exception as e:
60
+ error_message = f"Error processing submission: {str(e)}"
61
+ if 'meta_data' in locals():
62
+ send_error_notification(meta_data, error_message)
63
+ return error_message
64
+
65
 
66
  def process_single_instance(args):
67
  idx, eval_csv, qa_dataset, evaluator, eval_metrics = args
 
512
  except Exception as e:
513
  print(f"Failed to send submission confirmation: {e}")
514
 
515
+
516
  def process_submission(
517
  method_name, team_name, dataset, split, contact_email,
518
  code_repo, csv_file, model_description, hardware, paper_link
 
546
  "(Optional) Paper link": paper_link
547
  }
548
 
549
+ # Save and process files
550
+ api = HfApi()
551
+ REPO_ID = "snap-stanford/stark-leaderboard" # Replace with your space name
552
+ HF_TOKEN = os.getenv("HF_TOKEN")
553
+
554
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
555
+ folder_name = f"{sanitize_name(method_name)}_{sanitize_name(team_name)}"
 
 
 
 
 
 
556
 
557
  # Save CSV file
558
  csv_filename = f"predictions_{timestamp}.csv"
559
+ csv_path_in_repo = f"submissions/{folder_name}/{csv_filename}"
 
 
 
560
 
561
+ with NamedTemporaryFile(mode='wb', delete=False) as tmp_file:
562
+ shutil.copyfileobj(csv_file.file, tmp_file)
 
 
 
 
563
 
 
564
  try:
565
+ # api.upload_file(
566
+ # path_or_fileobj=tmp_file.name,
567
+ # path_in_repo=csv_path_in_repo,
568
+ # repo_id=REPO_ID,
569
+ # repo_type="space",
570
+ # token=HF_TOKEN
571
+ # )
572
+ hub_storage.save_to_hub(
573
+ file_content=tmp_file.name,
574
+ path_in_repo=csv_path_in_repo,
575
+ commit_message=f"Add submission: {method_name} by {team_name}"
576
  )
577
+ finally:
578
+ os.unlink(tmp_file.name)
579
+
580
+ # Process evaluation
581
+ results = compute_metrics(
582
+ csv_file.name,
583
+ dataset=dataset.lower(),
584
+ split=split,
585
+ num_workers=4
586
+ )
587
+
588
+ if isinstance(results, str):
589
+ send_error_notification(meta_data, results)
590
+ return f"Evaluation error: {results}"
591
+
592
+ # Process results (multiply by 100)
593
+ processed_results = {
594
+ "hit@1": round(results['hit@1'] * 100, 2),
595
+ "hit@5": round(results['hit@5'] * 100, 2),
596
+ "recall@20": round(results['recall@20'] * 100, 2),
597
+ "mrr": round(results['mrr'] * 100, 2)
598
+ }
599
+
600
+ # Save metadata
601
+ submission_data = {
602
+ **meta_data,
603
+ "results": processed_results,
604
+ "status": "pending_review",
605
+ "submission_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
606
+ "csv_path": csv_path_in_repo
607
+ }
608
+
609
+ metadata_path = f"submissions/{folder_name}/metadata_{timestamp}.json"
610
+ with NamedTemporaryFile(mode='w', delete=False) as tmp_file:
611
+ json.dump(submission_data, tmp_file, indent=4)
612
 
613
+ try:
614
+ # api.upload_file(
615
+ # path_or_fileobj=tmp_file.name,
616
+ # path_in_repo=metadata_path,
617
+ # repo_id=REPO_ID,
618
+ # repo_type="space",
619
+ # token=HF_TOKEN
620
+ # )
621
+ hub_storage.save_to_hub(
622
+ file_content=tmp_file.name,
623
+ path_in_repo=metadata_path,
624
+ commit_message=f"Add metadata: {method_name} by {team_name}"
625
+ )
626
+ finally:
627
+ os.unlink(tmp_file.name)
628
+
629
+ # Send confirmation email and update leaderboard
630
+ send_submission_confirmation(meta_data, processed_results)
631
+ update_leaderboard_data(submission_data)
632
+
633
+ return f"""
634
+ Submission successful!
635
+
636
+ Evaluation Results:
637
+ Hit@1: {processed_results['hit@1']:.2f}%
638
+ Hit@5: {processed_results['hit@5']:.2f}%
639
+ Recall@20: {processed_results['recall@20']:.2f}%
640
+ MRR: {processed_results['mrr']:.2f}%
641
+
642
+ Your submission has been saved and a confirmation email has been sent to {contact_email}.
643
+ Once approved, your results will appear in the leaderboard under: {method_name}
644
+
645
+ You can find your submission at:
646
+ https://huggingface.co/spaces/{REPO_ID}/tree/main/submissions/{folder_name}
647
+ """
648
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
  except Exception as e:
650
  error_message = f"Error processing submission: {str(e)}"
651
+ if 'meta_data' in locals():
652
+ send_error_notification(meta_data, error_message)
653
  return error_message
654
+
655
  def filter_by_model_type(df, selected_types):
656
  if not selected_types:
657
  return df.head(0)