Add initial space version
Browse files- app.py +101 -0
- requirements.txt +1 -0
- src/content.py +31 -0
- src/get_results_for_task.py +33 -0
- src/submission_uploader.py +146 -0
- src/tasks.py +29 -0
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
|
5 |
+
from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
|
6 |
+
LEADERBOARD_TEXT, LEADERBOARD_TITLE,
|
7 |
+
SUBMISSION_TEXT_FILES, SUBMISSION_TEXT_INTRO,
|
8 |
+
SUBMISSION_TEXT_METADATA, SUBMISSION_TEXT_SUBMIT,
|
9 |
+
SUBMISSION_TEXT_TASK, SUBMISSION_TITLE)
|
10 |
+
from src.get_results_for_task import get_results_for_task_stub
|
11 |
+
from src.submission_uploader import SubmissionUploader
|
12 |
+
from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
|
13 |
+
|
14 |
+
submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
|
15 |
+
|
16 |
+
|
17 |
+
with gr.Blocks() as demo:
|
18 |
+
gr.HTML(INTRODUCTION_TITLE)
|
19 |
+
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
20 |
+
|
21 |
+
gr.HTML(LEADERBOARD_TITLE)
|
22 |
+
gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
|
23 |
+
|
24 |
+
with gr.Tabs():
|
25 |
+
for task in TASKS_PRETTY_REVERSE:
|
26 |
+
with gr.TabItem(task):
|
27 |
+
with gr.Row():
|
28 |
+
gr.Markdown(TASKS_DESCRIPTIONS[task])
|
29 |
+
|
30 |
+
leaderboard_table = gr.components.Dataframe(
|
31 |
+
value=get_results_for_task_stub(task), interactive=False
|
32 |
+
)
|
33 |
+
|
34 |
+
gr.HTML(SUBMISSION_TITLE)
|
35 |
+
gr.Markdown(SUBMISSION_TEXT_INTRO, elem_classes="markdown-text")
|
36 |
+
|
37 |
+
with gr.Accordion("๐ Submit new results"):
|
38 |
+
gr.Markdown(SUBMISSION_TEXT_TASK, elem_classes="markdown-text")
|
39 |
+
task = gr.Radio(TASKS_PRETTY_REVERSE.keys(), label="Task")
|
40 |
+
|
41 |
+
gr.Markdown(SUBMISSION_TEXT_METADATA, elem_classes="markdown-text")
|
42 |
+
with gr.Row():
|
43 |
+
with gr.Column():
|
44 |
+
model_folder_textbox = gr.Textbox(
|
45 |
+
label="Model Folder",
|
46 |
+
placeholder="How to call a folder related to this submission in our results dataset.",
|
47 |
+
)
|
48 |
+
model_name_textbox = gr.Textbox(
|
49 |
+
label="Model Name",
|
50 |
+
placeholder="How to display model's name on the leaderboard.",
|
51 |
+
)
|
52 |
+
model_availability_textbox = gr.Textbox(
|
53 |
+
label="Availability",
|
54 |
+
placeholder="Information about the model's availability and licensing.",
|
55 |
+
)
|
56 |
+
context_size_textbox = gr.Textbox(
|
57 |
+
label="Context Size",
|
58 |
+
placeholder="Context size (in tokens) used for the submission.",
|
59 |
+
)
|
60 |
+
with gr.Column():
|
61 |
+
submitted_by_textbox = gr.Textbox(
|
62 |
+
label="Submitted By",
|
63 |
+
placeholder="Who submitted the model, how it will be displayed on the leaderboard.",
|
64 |
+
)
|
65 |
+
contact_textbox = gr.Textbox(
|
66 |
+
label="Contact Information",
|
67 |
+
placeholder="How Long Code Arena team can contact you in case of any questions (won't go to public dataset).",
|
68 |
+
)
|
69 |
+
comment_textbox = gr.Textbox(
|
70 |
+
label="Comment",
|
71 |
+
placeholder="Any comments you have for Long Code Arena team (optional, won't go to public dataset).",
|
72 |
+
)
|
73 |
+
url_textbox = gr.Textbox(
|
74 |
+
label="Relevant URLs",
|
75 |
+
placeholder="URLs to relevant resources (preprint/blogpost/code/etc.) with "
|
76 |
+
"additional details about your submission.",
|
77 |
+
)
|
78 |
+
|
79 |
+
gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
|
80 |
+
file_output = gr.File(file_count="multiple")
|
81 |
+
|
82 |
+
gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
|
83 |
+
submit_button = gr.Button("Submit")
|
84 |
+
submission_result = gr.Markdown()
|
85 |
+
submit_button.click(
|
86 |
+
submission_uploader.upload_files,
|
87 |
+
[
|
88 |
+
task,
|
89 |
+
model_folder_textbox,
|
90 |
+
model_name_textbox,
|
91 |
+
model_availability_textbox,
|
92 |
+
url_textbox,
|
93 |
+
context_size_textbox,
|
94 |
+
submitted_by_textbox,
|
95 |
+
file_output,
|
96 |
+
],
|
97 |
+
submission_result,
|
98 |
+
)
|
99 |
+
|
100 |
+
if __name__ == "__main__":
|
101 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
huggingface_hub
|
src/content.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# ================================
|
2 |
+
# = ABOUT =
|
3 |
+
# ================================
|
4 |
+
INTRODUCTION_TITLE = """<h1 align="center">๐๏ธ Long Code Arena</h1>"""
|
5 |
+
|
6 |
+
INTRODUCTION_TEXT = """๐๏ธ Long Code Arena is a benchmark of code-related tasks with large contexts, up to a whole code repository.
|
7 |
+
It currently spans six different tasks."""
|
8 |
+
|
9 |
+
# ================================
|
10 |
+
# = LEADERBOARD =
|
11 |
+
# ================================
|
12 |
+
LEADERBOARD_TITLE = '<h2 align="center">๐
Leaderboard</h2>'
|
13 |
+
|
14 |
+
LEADERBOARD_TEXT = """Raw results from the leaderboard are available in ๐ค [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results)."""
|
15 |
+
|
16 |
+
# ================================
|
17 |
+
# = SUBMISSION =
|
18 |
+
# ================================
|
19 |
+
SUBMISSION_TITLE = '<h2 align="center">๐ฉ Make A Submission</h2>'
|
20 |
+
|
21 |
+
SUBMISSION_TEXT_INTRO = """Use the form below to submit new results to ๐๏ธ Long Code Arena. If any problems arise, don't hesitate to contact us by email `TODO` or open a discussion ๐"""
|
22 |
+
|
23 |
+
SUBMISSION_TEXT_TASK = """1. Select a task you want to submit results for."""
|
24 |
+
|
25 |
+
SUBMISSION_TEXT_METADATA = """2. Fill in some metadata about your submission."""
|
26 |
+
|
27 |
+
SUBMISSION_TEXT_FILES = """3. Attach one or more files with your model's predictions.
|
28 |
+
* If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by ๐๏ธ Long Code Arena Team, the results are averaged across 3 runs.
|
29 |
+
* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by ๐๏ธ Long Code Arena Team in ๐ค [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
|
30 |
+
"""
|
31 |
+
SUBMISSION_TEXT_SUBMIT = """All set! A new PR to ๐ค [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. ๐๏ธ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard."""
|
src/get_results_for_task.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
RESULTS_DATASET = "JetBrains-Research/lca-results"
|
4 |
+
|
5 |
+
|
6 |
+
def get_results_for_task_stub(task: str) -> pd.DataFrame:
|
7 |
+
stub_df = pd.DataFrame(
|
8 |
+
[
|
9 |
+
{
|
10 |
+
"Model Name": "GPT-4",
|
11 |
+
"Availability": "Proprietary",
|
12 |
+
"Context Size": "16k",
|
13 |
+
"BLEU": "X",
|
14 |
+
"ROUGE": "X",
|
15 |
+
"ChrF": "X",
|
16 |
+
"BERTScore": "X",
|
17 |
+
"BERTScore (Normalized)": "X",
|
18 |
+
"Submitted By": "๐ Long Code Arena Team",
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"Model Name": "CodeLlama-7b (instruct)",
|
22 |
+
"Availability": "Llama 2 license",
|
23 |
+
"Context Size": "16k",
|
24 |
+
"BLEU": "X",
|
25 |
+
"ROUGE": "X",
|
26 |
+
"ChrF": "X",
|
27 |
+
"BERTScore": "X",
|
28 |
+
"BERTScore (Normalized)": "X",
|
29 |
+
"Submitted By": "๐ Long Code Arena Team",
|
30 |
+
},
|
31 |
+
]
|
32 |
+
)
|
33 |
+
return stub_df
|
src/submission_uploader.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from typing import List, Optional
|
5 |
+
|
6 |
+
from huggingface_hub import (CommitInfo, CommitOperationAdd, Discussion, HfApi,
|
7 |
+
HfFileSystem)
|
8 |
+
|
9 |
+
from .tasks import TASKS_PRETTY_REVERSE
|
10 |
+
|
11 |
+
|
12 |
+
class AlreadyExists(Exception):
|
13 |
+
pass
|
14 |
+
|
15 |
+
|
16 |
+
class SubmissionUploader:
|
17 |
+
"""Class for adding new files to a dataset on a Hub and opening a PR.
|
18 |
+
|
19 |
+
Heavily influenced by these amazing spaces:
|
20 |
+
* https://huggingface.co/spaces/safetensors/convert
|
21 |
+
* https://huggingface.co/spaces/gaia-benchmark/leaderboard
|
22 |
+
"""
|
23 |
+
|
24 |
+
def __init__(self, dataset_id: str):
|
25 |
+
self._api = HfApi(token=os.environ["HF_TOKEN"])
|
26 |
+
self._fs = HfFileSystem(token=os.environ["HF_TOKEN"])
|
27 |
+
self._dataset_id = dataset_id
|
28 |
+
|
29 |
+
def _get_previous_pr(self, pr_title: str) -> Optional[Discussion]:
|
30 |
+
"""Searches among discussions of dataset repo for a PR with the given title."""
|
31 |
+
try:
|
32 |
+
discussions = self._api.get_repo_discussions(repo_id=self._dataset_id)
|
33 |
+
except Exception:
|
34 |
+
return None
|
35 |
+
for discussion in discussions:
|
36 |
+
if (
|
37 |
+
discussion.status == "open"
|
38 |
+
and discussion.is_pull_request
|
39 |
+
and discussion.title == pr_title
|
40 |
+
):
|
41 |
+
return discussion
|
42 |
+
|
43 |
+
def _upload_files(
|
44 |
+
self,
|
45 |
+
task_id: str,
|
46 |
+
model_folder: str,
|
47 |
+
model_name_pretty: str,
|
48 |
+
model_availability: str,
|
49 |
+
urls: str,
|
50 |
+
context_size: str,
|
51 |
+
submitted_by: str,
|
52 |
+
filenames: Optional[List[str]],
|
53 |
+
) -> List[CommitOperationAdd]:
|
54 |
+
# add predictions files
|
55 |
+
commit_operations = [
|
56 |
+
CommitOperationAdd(
|
57 |
+
path_in_repo=f"{task_id}/{model_folder}/predictions/{filename}",
|
58 |
+
path_or_fileobj=filename,
|
59 |
+
)
|
60 |
+
for filename in filenames
|
61 |
+
]
|
62 |
+
|
63 |
+
# add metadata file
|
64 |
+
metadata_dict = {
|
65 |
+
"model_name": model_name_pretty,
|
66 |
+
"model_availability": model_availability,
|
67 |
+
"urls": urls,
|
68 |
+
"context_size": context_size,
|
69 |
+
"submitted_by": submitted_by,
|
70 |
+
}
|
71 |
+
with open("metadata.json", "w") as f:
|
72 |
+
json.dump(metadata_dict, f)
|
73 |
+
commit_operations.append(
|
74 |
+
CommitOperationAdd(
|
75 |
+
path_in_repo=f"{task_id}/predictions/metadata.json",
|
76 |
+
path_or_fileobj="metadata.json",
|
77 |
+
)
|
78 |
+
)
|
79 |
+
|
80 |
+
return commit_operations
|
81 |
+
|
82 |
+
def upload_files(
|
83 |
+
self,
|
84 |
+
task_pretty: str,
|
85 |
+
model_folder: str,
|
86 |
+
model_name_pretty: str,
|
87 |
+
model_availability: str,
|
88 |
+
urls: str,
|
89 |
+
context_size: str,
|
90 |
+
submitted_by: str,
|
91 |
+
filenames: Optional[List[str]],
|
92 |
+
force: bool = False,
|
93 |
+
) -> Optional[CommitInfo]:
|
94 |
+
pr_title = f"๐ New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
|
95 |
+
|
96 |
+
task_id = TASKS_PRETTY_REVERSE[task_pretty]
|
97 |
+
|
98 |
+
if not force:
|
99 |
+
if model_name_pretty in self._fs.ls(
|
100 |
+
f"{self._dataset_id}/{task_id}/predictions"
|
101 |
+
) and all(
|
102 |
+
filename
|
103 |
+
in self._fs.ls(
|
104 |
+
f"{self._dataset_id}/{task_id}/predictions/{model_name_pretty}"
|
105 |
+
)
|
106 |
+
for filename in filenames + ["metadata.json"]
|
107 |
+
):
|
108 |
+
raise AlreadyExists(
|
109 |
+
f"{model_name_pretty} is already present in {self._dataset_id}."
|
110 |
+
)
|
111 |
+
|
112 |
+
prev_pr = self._get_previous_pr(pr_title)
|
113 |
+
if prev_pr is not None:
|
114 |
+
url = f"https://huggingface.co/{self._dataset_id}/discussions/{prev_pr.num}"
|
115 |
+
raise AlreadyExists(
|
116 |
+
f"{self._dataset_id} already has an open PR for this submission: {url}."
|
117 |
+
)
|
118 |
+
|
119 |
+
commit_operations = self._upload_files(
|
120 |
+
task_id=task_id,
|
121 |
+
model_folder=model_folder,
|
122 |
+
model_name_pretty=model_name_pretty,
|
123 |
+
model_availability=model_availability,
|
124 |
+
urls=urls,
|
125 |
+
context_size=context_size,
|
126 |
+
submitted_by=submitted_by,
|
127 |
+
filenames=filenames,
|
128 |
+
)
|
129 |
+
|
130 |
+
new_pr = self._api.create_commit(
|
131 |
+
repo_id=self._dataset_id,
|
132 |
+
operations=commit_operations,
|
133 |
+
commit_message=pr_title,
|
134 |
+
commit_description=f"""New submission to {task_pretty} task in ๐๏ธLong Code Arena benchmark!
|
135 |
+
|
136 |
+
* Model name: {model_name_pretty}
|
137 |
+
* Model availability: {model_availability}
|
138 |
+
* Context Size: {context_size}
|
139 |
+
* Relevant URLs: {urls}
|
140 |
+
* Submitted By: {submitted_by}
|
141 |
+
""",
|
142 |
+
create_pr=True,
|
143 |
+
)
|
144 |
+
logging.info(f"PR created at {new_pr.pr_url}")
|
145 |
+
|
146 |
+
return new_pr
|
src/tasks.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TASKS_PRETTY = {
|
2 |
+
"cmg": "Commit Message Generation",
|
3 |
+
"bug_localization": "Bug Localization on Issue",
|
4 |
+
"module_to_text": "Module-to-Text",
|
5 |
+
"library_usage": "Library Usage Examples Generation",
|
6 |
+
"project_code_completion": "Project-level Code Completion",
|
7 |
+
"bug_localization_build_logs": "Bug Localization on Build Logs",
|
8 |
+
}
|
9 |
+
TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
|
10 |
+
|
11 |
+
TASKS_DESCRIPTIONS = {
|
12 |
+
"Commit Message Generation": """# Commit Message Generation\n
|
13 |
+
|
14 |
+
Our Commit Message Generation benchmark ๐ค [JetBrains-Research/lca-cmg](https://huggingface.co/datasets/JetBrains-Research/lca-cmg) includes 163 manually curated commits from Python projects.
|
15 |
+
|
16 |
+
We use the following metrics for evaluation:
|
17 |
+
* [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
|
18 |
+
* [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
|
19 |
+
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
|
20 |
+
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/berscore)
|
21 |
+
|
22 |
+
For further details on the dataset and the baselines from ๐๏ธ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
|
23 |
+
""",
|
24 |
+
"Bug Localization on Issue": "cool description for Bug Localization on Issue task",
|
25 |
+
"Module-to-Text": "cool description for Module-to-Text task",
|
26 |
+
"Library Usage Examples Generation": "cool description for Library Usage Examples Generation task",
|
27 |
+
"Project-level Code Completion": "cool description for Project-level Code Completion task",
|
28 |
+
"Bug Localization on Build Logs": "cool description for Bug Localization on Build Logs task",
|
29 |
+
}
|