Spaces:

JetBrains-Research
/

long-code-arena

Running

App Files Files Community

saridormi commited on Dec 25, 2023

Commit

04b315a

1 Parent(s): 6c92442

Add a separate dataset for aggregating requests metadata

Browse files

Files changed (2) hide show

app.py +6 -2
src/submission_uploader.py +83 -12

app.py CHANGED Viewed

@@ -30,7 +30,9 @@ logging.basicConfig(
     handlers=[logging.StreamHandler()],
 )
-submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
 with gr.Blocks() as demo:
@@ -61,7 +63,7 @@ with gr.Blocks() as demo:
             with gr.Column():
                 model_folder_textbox = gr.Textbox(
                     label="Model Folder",
-                    placeholder="How to call a folder related to this submission in our results dataset.",
                 )
                 model_name_textbox = gr.Textbox(
                     label="Model Name",
@@ -111,6 +113,8 @@ with gr.Blocks() as demo:
                 url_textbox,
                 context_size_textbox,
                 submitted_by_textbox,
                 file_output,
             ],
             submission_result,

     handlers=[logging.StreamHandler()],
 )
+submission_uploader = SubmissionUploader(
+    dataset_id=os.environ["DATASET_ID"], private_dataset_id=os.environ["PRIVATE_DATASET_ID"]
+)
 with gr.Blocks() as demo:
             with gr.Column():
                 model_folder_textbox = gr.Textbox(
                     label="Model Folder",
+                    placeholder="How to call a folder related to this submission in our results dataset (should be unique).",
                 )
                 model_name_textbox = gr.Textbox(
                     label="Model Name",
                 url_textbox,
                 context_size_textbox,
                 submitted_by_textbox,
+                contact_textbox,
+                comment_textbox,
                 file_output,
             ],
             submission_result,

src/submission_uploader.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
 import os
 from tempfile import TemporaryDirectory
 from typing import Dict, List, Optional
@@ -26,10 +27,11 @@ class SubmissionUploader:
     * https://huggingface.co/spaces/gaia-benchmark/leaderboard
     """
-    def __init__(self, dataset_id: str):
         self._api = HfApi(token=os.environ["HF_TOKEN"])
         self._fs = HfFileSystem(token=os.environ["HF_TOKEN"])
         self._dataset_id = dataset_id
     def _get_previous_pr(self, pr_title: str) -> Optional[Discussion]:
         """Searches among discussions of dataset repo for a PR with the given title."""
@@ -46,10 +48,10 @@ class SubmissionUploader:
         self,
         model_name_pretty: str,
         model_availability: str,
-        urls: str,
         context_size: str,
         submitted_by: str,
-    ) -> Dict[str, str]:
         return {
             "model_name": model_name_pretty,
             "model_availability": model_availability,
@@ -58,6 +60,45 @@ class SubmissionUploader:
             "submitted_by": submitted_by,
         }
     def _upload_predictions(
         self,
         task_id: str,
@@ -107,7 +148,7 @@ class SubmissionUploader:
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
-        urls: str,
         context_size: str,
         submitted_by: str,
         temp_directory: str,
@@ -141,9 +182,11 @@ class SubmissionUploader:
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
-        urls: str,
         context_size: str,
         submitted_by: str,
         filenames: Optional[List[str]],
     ):
         assert task_pretty and task_pretty in TASKS_PRETTY_REVERSE, "Please, select one of the supported tasks."
@@ -158,6 +201,7 @@ class SubmissionUploader:
         assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
         assert filenames, "Please, attach at least one file with predictions."
     def upload_files(
         self,
@@ -165,9 +209,11 @@ class SubmissionUploader:
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
-        urls: str,
         context_size: str,
         submitted_by: str,
         filenames: Optional[List[str]],
         force: bool = False,
     ) -> str:
@@ -180,6 +226,8 @@ class SubmissionUploader:
                 urls=urls,
                 context_size=context_size,
                 submitted_by=submitted_by,
                 filenames=filenames,
             )
             pr_title = f"🚀 New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
@@ -190,11 +238,10 @@ class SubmissionUploader:
             logging.info("Checking if this request has already been submitted...")
             if not force:
-                if model_name_pretty in self._fs.ls(f"datasets/{self._dataset_id}/{task_id}/predictions") and all(
-                    filename in self._fs.ls(f"datasets/{self._dataset_id}/{task_id}/predictions/{model_name_pretty}")
-                    for filename in filenames
-                ):
-                    return styled_warning(f"{model_name_pretty} is already present in {self._dataset_id}.")
                 prev_pr = self._get_previous_pr(pr_title)
                 if prev_pr is not None:
@@ -224,7 +271,7 @@ class SubmissionUploader:
                     temp_directory=str(d),
                 )
-                logging.info("Creating commit...")
                 new_pr = self._api.create_commit(
                     repo_id=self._dataset_id,
                     operations=predictions_commit_operations + results_commit_operations,
@@ -233,6 +280,30 @@ class SubmissionUploader:
                     create_pr=True,
                     repo_type="dataset",
                 )
                 return styled_message(f"🎉 PR created at {new_pr.pr_url}.")
         except Exception as e:

 import json
 import logging
 import os
+import time
 from tempfile import TemporaryDirectory
 from typing import Dict, List, Optional
     * https://huggingface.co/spaces/gaia-benchmark/leaderboard
     """
+    def __init__(self, dataset_id: str, private_dataset_id: str):
         self._api = HfApi(token=os.environ["HF_TOKEN"])
         self._fs = HfFileSystem(token=os.environ["HF_TOKEN"])
         self._dataset_id = dataset_id
+        self._private_dataset_id = private_dataset_id
     def _get_previous_pr(self, pr_title: str) -> Optional[Discussion]:
         """Searches among discussions of dataset repo for a PR with the given title."""
         self,
         model_name_pretty: str,
         model_availability: str,
+        urls: Optional[str],
         context_size: str,
         submitted_by: str,
+    ) -> Dict[str, Optional[str]]:
         return {
             "model_name": model_name_pretty,
             "model_availability": model_availability,
             "submitted_by": submitted_by,
         }
+    def _upload_request(
+        self,
+        task_id: str,
+        model_folder: str,
+        model_name_pretty: str,
+        model_availability: str,
+        urls: Optional[str],
+        context_size: str,
+        submitted_by: str,
+        contact_information: str,
+        comment: Optional[str],
+        pr_url: str,
+        temp_directory: str,
+    ) -> List[CommitOperationAdd]:
+        request_metadata = {
+            "model_folder": model_folder,
+            "model_name_pretty": model_name_pretty,
+            "model_availability": model_availability,
+            "urls": urls,
+            "context_size": context_size,
+            "submitted_by": submitted_by,
+            "contact_information": contact_information,
+            "comment": comment,
+            "timestamp": time.time(),
+            "pr_url": pr_url,
+        }
+        with open(os.path.join(temp_directory, "request_metadata.json"), "w") as f:
+            json.dump(request_metadata, f)
+        num_requests_already_present = len(self._fs.ls(f"datasets/{self._private_dataset_id}/{task_id}/"))
+        commit_operations = [
+            CommitOperationAdd(
+                path_in_repo=f"{task_id}/{num_requests_already_present}_{model_folder}.json",
+                path_or_fileobj=os.path.join(temp_directory, "request_metadata.json"),
+            )
+        ]
+        return commit_operations
     def _upload_predictions(
         self,
         task_id: str,
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
+        urls: Optional[str],
         context_size: str,
         submitted_by: str,
         temp_directory: str,
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
+        urls: Optional[str],
         context_size: str,
         submitted_by: str,
+        contact_information: str,
+        comment: Optional[str],
         filenames: Optional[List[str]],
     ):
         assert task_pretty and task_pretty in TASKS_PRETTY_REVERSE, "Please, select one of the supported tasks."
         assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
         assert filenames, "Please, attach at least one file with predictions."
+        assert contact_information, "Please, fill in the field with contact information."
     def upload_files(
         self,
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
+        urls: Optional[str],
         context_size: str,
         submitted_by: str,
+        contact_information: str,
+        comment: Optional[str],
         filenames: Optional[List[str]],
         force: bool = False,
     ) -> str:
                 urls=urls,
                 context_size=context_size,
                 submitted_by=submitted_by,
+                contact_information=contact_information,
+                comment=comment,
                 filenames=filenames,
             )
             pr_title = f"🚀 New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
             logging.info("Checking if this request has already been submitted...")
             if not force:
+                if model_folder in self._fs.ls(f"datasets/{self._dataset_id}/{task_id}/predictions"):
+                    return styled_warning(
+                        f"{model_folder} is already present in {self._dataset_id}, please, select another folder name."
+                    )
                 prev_pr = self._get_previous_pr(pr_title)
                 if prev_pr is not None:
                     temp_directory=str(d),
                 )
+                logging.info(f"Creating commit to results dataset...")
                 new_pr = self._api.create_commit(
                     repo_id=self._dataset_id,
                     operations=predictions_commit_operations + results_commit_operations,
                     create_pr=True,
                     repo_type="dataset",
                 )
+                logging.info(f"Creating commit to requests dataset...")
+                request_commit_operations = self._upload_request(
+                    task_id=task_id,
+                    model_folder=model_folder,
+                    temp_directory=str(d),
+                    model_name_pretty=model_name_pretty,
+                    model_availability=model_availability,
+                    urls=urls,
+                    context_size=context_size,
+                    submitted_by=submitted_by,
+                    contact_information=contact_information,
+                    comment=comment,
+                    pr_url=new_pr.pr_url,
+                )
+                self._api.create_commit(
+                    repo_id=self._private_dataset_id,
+                    operations=request_commit_operations,
+                    commit_message=pr_title,
+                    commit_description=f"""New submission to {task_pretty} task in 🏟️ Long Code Arena benchmark!\n* Model name: {model_name_pretty}\n* Model availability: {model_availability}\n* Context Size: {context_size}\n* Relevant URLs: {urls}\n* Submitted By: {submitted_by}\n* PR: {new_pr.pr_url}\n* Contact information: {contact_information}\n* Comment: {comment}""",
+                    create_pr=True,
+                    repo_type="dataset",
+                )
                 return styled_message(f"🎉 PR created at {new_pr.pr_url}.")
         except Exception as e: