long-code-arena / src /tasks_content.py
tiginamaria's picture
Update src/tasks_content.py
aa6b5d3 verified
raw
history blame
4.14 kB
from typing import Optional
TASKS_PRETTY = {
"commit_message_generation": "Commit Message Generation",
"bug_localization": "Bug Localization on Issue",
"module_to_text": "Module-to-Text",
"library_usage": "Library Usage Examples Generation",
"project_code_completion": "Project-level Code Completion",
"bug_localization_build_logs": "Bug Localization on Build Logs",
}
TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
TASKS_DESCRIPTIONS = {
"commit_message_generation": """# Commit Message Generation\n
Our Commit Message Generation benchmark πŸ€— [JetBrains-Research/lca-commit-message-generation](https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation) includes 163 manually curated commits from Python projects.
We use the following metrics for evaluation:
* [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
* [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
**Note.** The leaderboard is sorted by ROUGE-1 metric by default.
""",
"bug_localization": """# Bug Localization\n
Our Module-to-Text benchmark πŸ€— [JetBrains-Research/lca-bug-localization](https://huggingface.co/datasets/JetBrains-Research/lca-bug-localization) includes 7,479 bug issue descriptions with information about pull request that fix them for Python, Java and Kotlin projects.
Moreover, 150 data points from the test split were manually verified and can be used for bug localization approaches evaluation.
We used information retrieval metrics such as R@k, P@k and F1-score for evaluation, taking k equals to 2.
""",
"module_to_text": """# Module-to-Text\n
Our Module-to-Text benchmark πŸ€— [JetBrains-Research/lca-module-to-text](https://huggingface.co/datasets/JetBrains-Research/lca-module-to-text) includes 206 manually curated text files describing modules from different Python projects.
We use the following metrics for evaluation:
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
* [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
* ChatGPT-Turing-Test
For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `module2text` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
""",
"library_usage": "cool description for Library Usage Examples Generation task",
"project_code_completion": "cool description for Project-level Code Completion task",
"bug_localization_build_logs": "cool description for Bug Localization on Build Logs task",
}
def get_submission_text_files_for_task(task_pretty: Optional[str]) -> str:
if not task_pretty:
return "Please, select a specific task to see more detailed instructions regarding submitting files."
task_id = TASKS_PRETTY_REVERSE[task_pretty]
if task_id == "commit_message_generation":
return f"""**{task_pretty} Instructions:**\n\n* Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in πŸ€— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results/tree/main/commit_message_generation/predictions). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional."""
return f"**{task_pretty} Instructions:**\n\n* 🚧 There are no instructions for the current task yet."