saridormi commited on
Commit
04f40cd
β€’
1 Parent(s): a9273cf

Display urls on the leaderboard, tweak interface a little and fix CMG descriptions

Browse files
app.py CHANGED
@@ -82,7 +82,7 @@ with gr.Blocks() as demo:
82
  with gr.Column():
83
  url_textbox = gr.Textbox(
84
  label="Relevant URLs",
85
- placeholder="URLs to relevant resources with additional details about your submission (optional).",
86
  )
87
  model_availability_textbox = gr.Textbox(
88
  label="Availability",
@@ -107,9 +107,12 @@ with gr.Blocks() as demo:
107
  )
108
 
109
  gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
110
- task_specific_instructions = gr.Markdown(get_submission_text_files_for_task(None))
111
- task_selection.select(get_submission_text_files_for_task, [task_selection], task_specific_instructions)
112
- file_output = gr.File(file_count="multiple")
 
 
 
113
 
114
  gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
115
  submit_button = gr.Button("Submit")
 
82
  with gr.Column():
83
  url_textbox = gr.Textbox(
84
  label="Relevant URLs",
85
+ placeholder='URLs to relevant resources with additional details about your submission (optional). Use the following format: "[text1](link1), [text2](link2)".',
86
  )
87
  model_availability_textbox = gr.Textbox(
88
  label="Availability",
 
107
  )
108
 
109
  gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
110
+ with gr.Row():
111
+ with gr.Column(variant="panel"):
112
+ task_specific_instructions = gr.Markdown(get_submission_text_files_for_task(None))
113
+ task_selection.select(get_submission_text_files_for_task, [task_selection], task_specific_instructions)
114
+ with gr.Column():
115
+ file_output = gr.File(file_count="multiple")
116
 
117
  gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
118
  submit_button = gr.Button("Submit")
src/formatting.py CHANGED
@@ -10,5 +10,5 @@ def styled_message(message):
10
  return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
11
 
12
 
13
- def model_hyperlink(link, model_name):
14
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 
10
  return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
11
 
12
 
13
+ def model_hyperlink(model_name, link):
14
  return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
src/get_results_for_task.py CHANGED
@@ -1,5 +1,6 @@
1
  import logging
2
  import os
 
3
 
4
  import pandas as pd # type: ignore[import]
5
  from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
@@ -12,6 +13,7 @@ from .leaderboard_formatting import (
12
  get_columns_per_task,
13
  )
14
  from .tasks_content import TASKS_PRETTY_REVERSE
 
15
 
16
  try:
17
  AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
@@ -32,6 +34,7 @@ def _get_results_stub() -> pd.DataFrame:
32
  "BERTScore": "X",
33
  "BERTScore (Normalized)": "X",
34
  "Submitted By": "🏟 Long Code Arena Team",
 
35
  },
36
  {
37
  "Model Name": "CodeLlama-7b (instruct)",
@@ -43,14 +46,24 @@ def _get_results_stub() -> pd.DataFrame:
43
  "BERTScore": "X",
44
  "BERTScore (Normalized)": "X",
45
  "Submitted By": "🏟 Long Code Arena Team",
 
46
  },
47
  ]
48
  )
49
  return stub_df
50
 
51
 
 
 
 
 
 
 
 
52
  def _get_results_dataset(task_id: str) -> pd.DataFrame:
53
- results_df = load_dataset(os.environ["DATASET_ID"], task_id, split="test").to_pandas()
 
 
54
  results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
55
  results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
56
 
@@ -66,6 +79,7 @@ def _get_results_dataset(task_id: str) -> pd.DataFrame:
66
  model_hyperlink(link=link, model_name=model_name) if link else model_name
67
  for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
68
  ]
 
69
  results_df = results_df[get_columns_per_task(task_id)]
70
  return results_df
71
 
 
1
  import logging
2
  import os
3
+ import re
4
 
5
  import pandas as pd # type: ignore[import]
6
  from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
 
13
  get_columns_per_task,
14
  )
15
  from .tasks_content import TASKS_PRETTY_REVERSE
16
+ from .utils import MD_LINK_PATTERN
17
 
18
  try:
19
  AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
 
34
  "BERTScore": "X",
35
  "BERTScore (Normalized)": "X",
36
  "Submitted By": "🏟 Long Code Arena Team",
37
+ "Resources": "",
38
  },
39
  {
40
  "Model Name": "CodeLlama-7b (instruct)",
 
46
  "BERTScore": "X",
47
  "BERTScore (Normalized)": "X",
48
  "Submitted By": "🏟 Long Code Arena Team",
49
+ "Resources": "",
50
  },
51
  ]
52
  )
53
  return stub_df
54
 
55
 
56
+ def _process_urls(raw_urls: str) -> str:
57
+ if not raw_urls:
58
+ return raw_urls
59
+ html_urls = [model_hyperlink(*re.search(MD_LINK_PATTERN, url.strip()).groups()) for url in raw_urls.split(",")]
60
+ return ", ".join(html_urls)
61
+
62
+
63
  def _get_results_dataset(task_id: str) -> pd.DataFrame:
64
+ results_df = load_dataset(
65
+ os.environ["DATASET_ID"], task_id, split="test", download_mode="force_redownload"
66
+ ).to_pandas()
67
  results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
68
  results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
69
 
 
79
  model_hyperlink(link=link, model_name=model_name) if link else model_name
80
  for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
81
  ]
82
+ results_df["Resources"] = [_process_urls(urls) for urls in results_df["Resources"]]
83
  results_df = results_df[get_columns_per_task(task_id)]
84
  return results_df
85
 
src/leaderboard_formatting.py CHANGED
@@ -10,7 +10,7 @@ COLUMNS_PRETTY = {
10
  "bertscore_normalized": "BERTScore (Normalized)",
11
  "model_name": "Model Name",
12
  "model_availability": "Availability",
13
- "urls": "URLs",
14
  "context_size": "Context Size",
15
  "submitted_by": "Submitted By",
16
  }
@@ -35,9 +35,9 @@ SORT_COLUMN_PER_TASK = {"commit_message_generation": "ROUGE-1"}
35
  def get_columns_per_task(task_id: str) -> List[str]:
36
  metrics_per_task = METRICS_PER_TASK[task_id]
37
 
38
- return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By"]
39
 
40
 
41
  def get_types_per_task(task_id: str) -> List[str]:
42
  metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0))
43
- return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown"]
 
10
  "bertscore_normalized": "BERTScore (Normalized)",
11
  "model_name": "Model Name",
12
  "model_availability": "Availability",
13
+ "urls": "Resources",
14
  "context_size": "Context Size",
15
  "submitted_by": "Submitted By",
16
  }
 
35
  def get_columns_per_task(task_id: str) -> List[str]:
36
  metrics_per_task = METRICS_PER_TASK[task_id]
37
 
38
+ return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By", "Resources"]
39
 
40
 
41
  def get_types_per_task(task_id: str) -> List[str]:
42
  metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0))
43
+ return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"]
src/submission_uploader.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
  import logging
3
  import os
 
4
  import time
5
  from tempfile import TemporaryDirectory
6
  from typing import List, Optional
@@ -13,6 +14,7 @@ from tqdm import tqdm
13
  from .evaluation import METRICS
14
  from .formatting import styled_error, styled_message, styled_warning
15
  from .tasks_content import TASKS_PRETTY_REVERSE
 
16
 
17
 
18
  class AlreadyExists(Exception):
@@ -199,6 +201,12 @@ class SubmissionUploader:
199
  except:
200
  raise ValueError("Please, specify a model's context size as an integer (e.g., 16000).")
201
 
 
 
 
 
 
 
202
  assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
203
  assert filenames, "Please, attach at least one file with predictions."
204
  assert contact_information, "Please, fill in the field with contact information."
 
1
  import json
2
  import logging
3
  import os
4
+ import re
5
  import time
6
  from tempfile import TemporaryDirectory
7
  from typing import List, Optional
 
14
  from .evaluation import METRICS
15
  from .formatting import styled_error, styled_message, styled_warning
16
  from .tasks_content import TASKS_PRETTY_REVERSE
17
+ from .utils import MD_LINK_PATTERN
18
 
19
 
20
  class AlreadyExists(Exception):
 
201
  except:
202
  raise ValueError("Please, specify a model's context size as an integer (e.g., 16000).")
203
 
204
+ if urls is not None and "," in urls:
205
+ urls_list = urls.split(",")
206
+ assert all(
207
+ re.match(rf"^{MD_LINK_PATTERN}$", url.strip()) for url in urls_list
208
+ ), 'Please, use the following format for URLs: "[text1](link1), [text2](link2)"'
209
+
210
  assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
211
  assert filenames, "Please, attach at least one file with predictions."
212
  assert contact_information, "Please, fill in the field with contact information."
src/tasks_content.py CHANGED
@@ -13,7 +13,7 @@ TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
13
  TASKS_DESCRIPTIONS = {
14
  "commit_message_generation": """# Commit Message Generation\n
15
 
16
- Our Commit Message Generation benchmark πŸ€— [JetBrains-Research/lca-cmg](https://huggingface.co/datasets/JetBrains-Research/lca-cmg) includes 163 manually curated commits from Python projects.
17
 
18
  We use the following metrics for evaluation:
19
  * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
@@ -21,7 +21,9 @@ TASKS_DESCRIPTIONS = {
21
  * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
22
  * [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
23
 
24
- For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
 
 
25
  """,
26
  "bug_localization": "cool description for Bug Localization on Issue task",
27
  "module_to_text": "cool description for Module-to-Text task",
 
13
  TASKS_DESCRIPTIONS = {
14
  "commit_message_generation": """# Commit Message Generation\n
15
 
16
+ Our Commit Message Generation benchmark πŸ€— [JetBrains-Research/lca-commit-message-generation](https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation) includes 163 manually curated commits from Python projects.
17
 
18
  We use the following metrics for evaluation:
19
  * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
 
21
  * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
22
  * [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
23
 
24
+ For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
25
+
26
+ **Note.** The leaderboard is sorted by ROUGE-1 metric by default.
27
  """,
28
  "bug_localization": "cool description for Bug Localization on Issue task",
29
  "module_to_text": "cool description for Module-to-Text task",
src/utils.py ADDED
@@ -0,0 +1 @@
 
 
1
+ MD_LINK_PATTERN = r"\[(.*)\]\((.*?)\)"