add new files
Browse files- README.md +7 -46
- analysis.ipynb +0 -0
- api_wrappers/hf_data_loader.py +11 -0
- change_visualizer.py +7 -8
- chart.ipynb +0 -0
- chart_processing.ipynb +0 -0
- config.py +2 -3
- custom_metrics/__init__.py +0 -0
- custom_metrics/gpt_eval.py +0 -81
- data_stats.ipynb +759 -0
- generation_steps/metrics_analysis.py +11 -175
- generation_steps/{synthetic_end_to_start.py β synthetic_backward.py} +0 -0
- generation_steps/{synthetic_start_to_end.py β synthetic_forward.py} +0 -0
- metrics_analysis.ipynb +0 -0
- poetry.lock +0 -0
- pyproject.toml +187 -0
README.md
CHANGED
@@ -6,52 +6,13 @@ sdk_version: 4.37.2
|
|
6 |
app_file: change_visualizer.py
|
7 |
---
|
8 |
|
9 |
-
|
10 |
|
11 |
-
|
|
|
12 |
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
- Grazie API JWT token and Hugging Face token must be stored as environment variables.
|
19 |
-
- ### Visualization app -- a Gradio application that is currently deployed
|
20 |
-
at https://huggingface.co/spaces/JetBrains-Research/commit-rewriting-visualization.
|
21 |
-
- Shows
|
22 |
-
- The "golden" dataset of manually collected samples; the dataset is downloaded on startup
|
23 |
-
from https://huggingface.co/datasets/JetBrains-Research/commit-msg-rewriting
|
24 |
-
- The entire dataset that includes the synthetic samples; the dataset is downloaded on startup
|
25 |
-
from https://huggingface.co/datasets/JetBrains-Research/synthetic-commit-msg-rewriting
|
26 |
-
- Some statistics collected for the dataset (and its parts); computed on startup
|
27 |
-
|
28 |
-
_Note: datasets updated => need to restart the app to see the changes._
|
29 |
-
- Files
|
30 |
-
- [change_visualizer.py](change_visualizer.py)
|
31 |
-
- ### Data processing pipeline (_note: datasets and files names can be changed in the configuration file_)
|
32 |
-
- Run the whole pipeline by running [run_pipeline.py](run_pipeline.py)
|
33 |
-
- All intermediate results are stored as files defined in config
|
34 |
-
- Intermediate steps (can run them separately by running the corresponding files
|
35 |
-
from [generation_steps](generation_steps)). The input is then taken from the previous step's artifact.
|
36 |
-
- Generate the synthetic samples
|
37 |
-
- Files [generation_steps/synthetic_end_to_start.py](generation_steps/synthetic_end_to_start.py)
|
38 |
-
and [generation_steps/synthetic_start_to_end.py](generation_steps/synthetic_start_to_end.py)
|
39 |
-
- The first generation step (end to start) downloads the `JetBrains-Research/commit-msg-rewriting`
|
40 |
-
and `JetBrains-Research/lca-commit-message-generation` datasets from
|
41 |
-
Hugging Face datasets.
|
42 |
-
- Compute metrics
|
43 |
-
- File [generation_steps/metrics_analysis.py](generation_steps/metrics_analysis.py)
|
44 |
-
- Includes the functions for all metrics
|
45 |
-
- Downloads `JetBrains-Research/lca-commit-message-generation` Hugging Face dataset.
|
46 |
-
- The resulting artifact (dataset with golden and synthetic samples, attached reference messages and computed
|
47 |
-
metrics) is saved to the file [output/synthetic.csv](output/synthetic.csv). It should be uploaded
|
48 |
-
to https://huggingface.co/datasets/JetBrains-Research/synthetic-commit-msg-rewriting **manually**.
|
49 |
-
- ### Data analysis
|
50 |
-
- [analysis_util.py](analysis_util.py) -- some functions used for data analysis, e.g., correlations computation.
|
51 |
-
- [analysis.ipynb](analysis.ipynb) -- compute the correlations, the resulting tables.
|
52 |
-
- [chart_processing.ipynb](chart_processing.ipynb) -- Jupyter Notebook that draws the charts that were used in the
|
53 |
-
presentation/thesis.
|
54 |
-
- [generated_message_length_comparison.ipynb](generated_message_length_comparison.ipynb) -- compare the average
|
55 |
-
length of commit messages generated using the current prompt (one used in the research) and the production prompt
|
56 |
-
(one used to generate the messages that are measured in FUS logs). _Not finished, because could not get a Grazie
|
57 |
-
token; as soon as the token is received, the notebook can be run by following the instructions from the notebook._
|
|
|
6 |
app_file: change_visualizer.py
|
7 |
---
|
8 |
|
9 |
+
# Commit Message Editing Visualisation βοΈππ
|
10 |
|
11 |
+
This space provides a visualization app for exploring the commit message edits datasets (π€[expert-labeled](https://huggingface.co/datasets/JetBrains-Research/commit-msg-edits) and π€[synthetic](https://huggingface.co/datasets/JetBrains-Research/synthetic-commit-msg-edits))
|
12 |
+
from "Towards Realistic Evaluation of Commit Message Generation by Matching Online and Offline Settings" paper as well as some important artifacts from our work.
|
13 |
|
14 |
+
## Artifacts
|
15 |
|
16 |
+
* [`metrics_analysis.ipynb`](metrics_analysis.ipynb) contains the code for metrics calculation and analysis;
|
17 |
+
* [`chart.ipynb`](chart.ipynb) contains the code for Figure 4 with edit distance distribution;
|
18 |
+
* [`data_stats.ipynb`](data_stats.ipynb) contains the code for obtaining the dataset statistics from Table 1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
analysis.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
api_wrappers/hf_data_loader.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
import json
|
|
|
2 |
from datetime import datetime, timedelta
|
3 |
|
4 |
from datasets import load_dataset
|
|
|
5 |
|
6 |
import config
|
7 |
|
@@ -72,6 +74,15 @@ def load_synthetic_as_pandas():
|
|
72 |
|
73 |
def load_full_commit_with_predictions_as_pandas():
|
74 |
full_dataset = load_full_commit_as_pandas()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
predictions_dataset = load_dataset(config.HF_PREDICTIONS_DATASET_NAME,
|
76 |
config.HF_PREDICTIONS_DATASET_SUBNAME,
|
77 |
split=config.HF_PREDICTIONS_DATASET_SPLIT,
|
|
|
1 |
import json
|
2 |
+
import os
|
3 |
from datetime import datetime, timedelta
|
4 |
|
5 |
from datasets import load_dataset
|
6 |
+
from huggingface_hub import hf_hub_download, list_repo_tree
|
7 |
|
8 |
import config
|
9 |
|
|
|
74 |
|
75 |
def load_full_commit_with_predictions_as_pandas():
|
76 |
full_dataset = load_full_commit_as_pandas()
|
77 |
+
|
78 |
+
# TODO
|
79 |
+
# for prediction_file in list_repo_tree(repo_id=config.HF_PREDICTIONS_DATASET_NAME,
|
80 |
+
# path=os.path.join("commit_message_generation/predictions", config.HF_PREDICTIONS_MODEL),
|
81 |
+
# repo_type="dataset"):
|
82 |
+
# hf_hub_download(prediction_file.path,
|
83 |
+
# repo_id=config.HF_PREDICTIONS_DATASET_NAME,
|
84 |
+
# repo_type="dataset",)
|
85 |
+
|
86 |
predictions_dataset = load_dataset(config.HF_PREDICTIONS_DATASET_NAME,
|
87 |
config.HF_PREDICTIONS_DATASET_SUBNAME,
|
88 |
split=config.HF_PREDICTIONS_DATASET_SPLIT,
|
change_visualizer.py
CHANGED
@@ -86,10 +86,10 @@ if __name__ == '__main__':
|
|
86 |
end_view = gr.Textbox(interactive=False, label="End message", container=True)
|
87 |
session_view = gr.Textbox(interactive=False, label="Session", container=True)
|
88 |
is_end_to_start_view = gr.Textbox(interactive=False,
|
89 |
-
label="Is generated
|
90 |
container=True)
|
91 |
is_start_to_end_view = gr.Textbox(interactive=False,
|
92 |
-
label="Is generated
|
93 |
container=True)
|
94 |
link_view = gr.Markdown()
|
95 |
|
@@ -109,13 +109,15 @@ if __name__ == '__main__':
|
|
109 |
with gr.Tab("Manual"):
|
110 |
slider_manual, view_manual = dataset_view_tab(n_diffs_manual)
|
111 |
|
112 |
-
slider_manual.change(update_dataset_view_manual,
|
|
|
113 |
outputs=view_manual)
|
114 |
|
115 |
with gr.Tab("Synthetic"):
|
116 |
slider_synthetic, view_synthetic = dataset_view_tab(n_diffs_synthetic)
|
117 |
|
118 |
-
slider_synthetic.change(update_dataset_view_synthetic,
|
|
|
119 |
outputs=view_synthetic)
|
120 |
with gr.Tab("Analysis"):
|
121 |
def layout_for_statistics(statistics_group_name):
|
@@ -212,10 +214,7 @@ if __name__ == '__main__':
|
|
212 |
|
213 |
gr.Plot(value=chart)
|
214 |
|
215 |
-
gr.Markdown(f"###
|
216 |
-
gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown())
|
217 |
-
|
218 |
-
gr.Markdown(f"### Aggregated correlations")
|
219 |
gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown())
|
220 |
|
221 |
application.load(update_dataset_view_manual, inputs=slider_manual,
|
|
|
86 |
end_view = gr.Textbox(interactive=False, label="End message", container=True)
|
87 |
session_view = gr.Textbox(interactive=False, label="Session", container=True)
|
88 |
is_end_to_start_view = gr.Textbox(interactive=False,
|
89 |
+
label="Is generated via backward synthetic generation?",
|
90 |
container=True)
|
91 |
is_start_to_end_view = gr.Textbox(interactive=False,
|
92 |
+
label="Is generated via forward synthetic generation?",
|
93 |
container=True)
|
94 |
link_view = gr.Markdown()
|
95 |
|
|
|
109 |
with gr.Tab("Manual"):
|
110 |
slider_manual, view_manual = dataset_view_tab(n_diffs_manual)
|
111 |
|
112 |
+
slider_manual.change(update_dataset_view_manual,
|
113 |
+
inputs=slider_manual,
|
114 |
outputs=view_manual)
|
115 |
|
116 |
with gr.Tab("Synthetic"):
|
117 |
slider_synthetic, view_synthetic = dataset_view_tab(n_diffs_synthetic)
|
118 |
|
119 |
+
slider_synthetic.change(update_dataset_view_synthetic,
|
120 |
+
inputs=slider_synthetic,
|
121 |
outputs=view_synthetic)
|
122 |
with gr.Tab("Analysis"):
|
123 |
def layout_for_statistics(statistics_group_name):
|
|
|
214 |
|
215 |
gr.Plot(value=chart)
|
216 |
|
217 |
+
gr.Markdown(f"### Metrics correlations")
|
|
|
|
|
|
|
218 |
gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown())
|
219 |
|
220 |
application.load(update_dataset_view_manual, inputs=slider_manual,
|
chart.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
chart_processing.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
config.py
CHANGED
@@ -3,7 +3,7 @@ from pathlib import Path
|
|
3 |
|
4 |
RANDOM_STATE = 42
|
5 |
|
6 |
-
GRAZIE_API_JWT_TOKEN =
|
7 |
GRAZIE_TIMEOUT_SEC = 1.0
|
8 |
|
9 |
HF_TOKEN = os.environ.get('HF_TOKEN')
|
@@ -16,8 +16,7 @@ HF_FULL_COMMITS_DATASET_SUBNAME = "commitchronicle-py-long"
|
|
16 |
HF_FULL_COMMITS_DATASET_SPLIT = "test"
|
17 |
|
18 |
HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results"
|
19 |
-
|
20 |
-
HF_PREDICTIONS_DATASET_SPLIT = "test"
|
21 |
|
22 |
HF_SYNTHETIC_DATASET_NAME = "JetBrains-Research/synthetic-commit-msg-rewriting"
|
23 |
HF_SYNTHETIC_DATASET_SPLIT = 'train'
|
|
|
3 |
|
4 |
RANDOM_STATE = 42
|
5 |
|
6 |
+
GRAZIE_API_JWT_TOKEN = "eyJhbGciOiJSUzUxMiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJHcmF6aWUgQXV0aGVudGljYXRpb24iLCJ1aWQiOiJkNmFjZGM3Zi1jZWZlLTRhMDItOWRmMi01NzY5OGRlNjYyNDAiLCJ1c2VyX3N0YXRlIjoiSU5URVJOQUwiLCJyZWdpc3RyYXRpb25fZGF0ZSI6MTY4NDMzNjI3ODI2NCwibGljZW5zZSI6IjQ1TVcwNFZBVVoiLCJsaWNlbnNlX3R5cGUiOiJqZXRicmFpbnMtYWkub3JnYW5pemF0aW9uYWwucHJvIiwiZXhwIjoxNzIwNjk0OTQ2fQ.NH5KLYgkyaC1MfFHPj8jfe3yBBR8F017QV_Nn0_5AqiWqjaaVBIBCsxkZcTbwH6FBrGm-JXYM50UAhJprI3fy-HNkwfF6nAPRqkFafxT8IZ-Epk8P9u6SnC5YjD4LM4e_-aKeuXb4WdB6K_YDIRKIp64WthCS2OzLSDPiyXaHXADOBQMfWNvorXqjuKPUPE7q6L59Wes4VaDhXMPw2XA4MHUm_cTvK2a_SixaKiawxAv-Wa8vo2KcYbd4hqtxDwnoQ6c5WfmEqD-dUYvZ8G_53WNJO6gvIv0etEBx8NIez2dPXHyNqIyam4CrMXH9_stJwf998sL7NxdG2wRLGGC4A"
|
7 |
GRAZIE_TIMEOUT_SEC = 1.0
|
8 |
|
9 |
HF_TOKEN = os.environ.get('HF_TOKEN')
|
|
|
16 |
HF_FULL_COMMITS_DATASET_SPLIT = "test"
|
17 |
|
18 |
HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results"
|
19 |
+
HF_PREDICTIONS_MODEL = "gpt_4_0613"
|
|
|
20 |
|
21 |
HF_SYNTHETIC_DATASET_NAME = "JetBrains-Research/synthetic-commit-msg-rewriting"
|
22 |
HF_SYNTHETIC_DATASET_SPLIT = 'train'
|
custom_metrics/__init__.py
DELETED
File without changes
|
custom_metrics/gpt_eval.py
DELETED
@@ -1,81 +0,0 @@
|
|
1 |
-
from api_wrappers import grazie_wrapper
|
2 |
-
|
3 |
-
|
4 |
-
def build_prompt_ref(prediction, reference):
|
5 |
-
return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without
|
6 |
-
providing any additional feedback or commentary:
|
7 |
-
|
8 |
-
START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
|
9 |
-
{prediction}
|
10 |
-
END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
|
11 |
-
|
12 |
-
For reference, consider this as an example of a good commit message for the same commit that is both concise and
|
13 |
-
specific:
|
14 |
-
START OF THE REFERENCE COMMIT MESSAGE
|
15 |
-
{reference}
|
16 |
-
END OF THE REFERENCE COMMIT MESSAGE
|
17 |
-
|
18 |
-
YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the
|
19 |
-
lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
|
20 |
-
"""
|
21 |
-
|
22 |
-
|
23 |
-
def build_prompt_noref(prediction, diff):
|
24 |
-
return f"""Evaluate the following commit message based on clarity, specificity, context, and conciseness without
|
25 |
-
providing any additional feedback or commentary:
|
26 |
-
|
27 |
-
START OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
|
28 |
-
{prediction}
|
29 |
-
END OF THE COMMIT MESSAGE YOU HAVE TO EVALUATE
|
30 |
-
|
31 |
-
These are the code changes included in the commit:
|
32 |
-
START OF THE CODE CHANGES
|
33 |
-
{diff}
|
34 |
-
END OF THE CODE CHANGES
|
35 |
-
|
36 |
-
YOUR TASK: Provide a single number as a response, representing the rating on a scale from 1 to 10, where 1 is the
|
37 |
-
lowest quality and 10 is the highest quality. Do not include any other text or explanation in your response.
|
38 |
-
"""
|
39 |
-
|
40 |
-
|
41 |
-
N_RETRIES = 3
|
42 |
-
|
43 |
-
|
44 |
-
def get_number_for_prompt(prompt):
|
45 |
-
outputs = []
|
46 |
-
result = None
|
47 |
-
|
48 |
-
for i in range(N_RETRIES):
|
49 |
-
try:
|
50 |
-
output = grazie_wrapper.generate_for_prompt(prompt).strip().split()[-1]
|
51 |
-
outputs.append(output)
|
52 |
-
|
53 |
-
result = int(output)
|
54 |
-
break
|
55 |
-
except ValueError:
|
56 |
-
continue
|
57 |
-
|
58 |
-
if result is None:
|
59 |
-
raise RuntimeError(f"LLM cannot generate a number. Its outputs were: {str(outputs)}")
|
60 |
-
|
61 |
-
return result
|
62 |
-
|
63 |
-
|
64 |
-
def compute_ref(prediction, reference, n_requests):
|
65 |
-
prompt = build_prompt_ref(prediction, reference)
|
66 |
-
results = [
|
67 |
-
get_number_for_prompt(prompt)
|
68 |
-
for _ in range(n_requests)
|
69 |
-
]
|
70 |
-
|
71 |
-
return sum(results) / len(results)
|
72 |
-
|
73 |
-
|
74 |
-
def compute_noref(prediction, diff, n_requests):
|
75 |
-
prompt = build_prompt_noref(prediction, diff)
|
76 |
-
results = [
|
77 |
-
get_number_for_prompt(prompt)
|
78 |
-
for _ in range(n_requests)
|
79 |
-
]
|
80 |
-
|
81 |
-
return sum(results) / len(results)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data_stats.ipynb
ADDED
@@ -0,0 +1,759 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"metadata": {},
|
5 |
+
"cell_type": "markdown",
|
6 |
+
"source": "# Data Stats",
|
7 |
+
"id": "694a6cc631d4ab93"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"metadata": {
|
11 |
+
"ExecuteTime": {
|
12 |
+
"end_time": "2024-10-15T18:43:07.644299Z",
|
13 |
+
"start_time": "2024-10-15T18:43:02.316453Z"
|
14 |
+
}
|
15 |
+
},
|
16 |
+
"cell_type": "code",
|
17 |
+
"source": [
|
18 |
+
"from datasets import load_dataset\n",
|
19 |
+
"\n",
|
20 |
+
"\n",
|
21 |
+
"df = load_dataset(\"JetBrains-Research/synthetic-commit-msg-edits\", \"all_pairs\", split=\"train\").to_pandas()\n",
|
22 |
+
"df.head()"
|
23 |
+
],
|
24 |
+
"id": "ed42f4f83199feb2",
|
25 |
+
"outputs": [
|
26 |
+
{
|
27 |
+
"name": "stderr",
|
28 |
+
"output_type": "stream",
|
29 |
+
"text": [
|
30 |
+
"Downloading data: 100%|ββββββββββ| 6.35M/6.35M [00:00<00:00, 9.95MB/s]\n"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"data": {
|
35 |
+
"text/plain": [
|
36 |
+
"Generating train split: 0 examples [00:00, ? examples/s]"
|
37 |
+
],
|
38 |
+
"application/vnd.jupyter.widget-view+json": {
|
39 |
+
"version_major": 2,
|
40 |
+
"version_minor": 0,
|
41 |
+
"model_id": "1a0523289d424b29974b60d017643280"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"metadata": {},
|
45 |
+
"output_type": "display_data"
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"data": {
|
49 |
+
"text/plain": [
|
50 |
+
" hash repo \\\n",
|
51 |
+
"0 2febb99eee8ed71c9122db88ca58dd33be0b9550 mesonbuild/meson \n",
|
52 |
+
"1 2febb99eee8ed71c9122db88ca58dd33be0b9550 mesonbuild/meson \n",
|
53 |
+
"2 2febb99eee8ed71c9122db88ca58dd33be0b9550 mesonbuild/meson \n",
|
54 |
+
"3 2febb99eee8ed71c9122db88ca58dd33be0b9550 mesonbuild/meson \n",
|
55 |
+
"4 2febb99eee8ed71c9122db88ca58dd33be0b9550 mesonbuild/meson \n",
|
56 |
+
"\n",
|
57 |
+
" G_text \\\n",
|
58 |
+
"0 Enhance OptionOverrideProxy and simplify optio... \n",
|
59 |
+
"1 Enhance OptionOverrideProxy and simplify optio... \n",
|
60 |
+
"2 Enhance OptionOverrideProxy and simplify optio... \n",
|
61 |
+
"3 Enhance OptionOverrideProxy and simplify optio... \n",
|
62 |
+
"4 Enhance OptionOverrideProxy and simplify optio... \n",
|
63 |
+
"\n",
|
64 |
+
" E_text G_type \\\n",
|
65 |
+
"0 Enhance OptionOverrideProxy for multiple optio... synthetic_backward \n",
|
66 |
+
"1 Refactor OptionOverrideProxy and Backend class... synthetic_backward \n",
|
67 |
+
"2 Refactor OptionOverrideProxy and backend optio... synthetic_backward \n",
|
68 |
+
"3 Refactor: Enhance OptionOverrideProxy for mult... synthetic_backward \n",
|
69 |
+
"4 Refactor OptionOverrideProxy and add target-sp... synthetic_backward \n",
|
70 |
+
"\n",
|
71 |
+
" E_type is_related \n",
|
72 |
+
"0 expert_labeled True \n",
|
73 |
+
"1 synthetic_forward True \n",
|
74 |
+
"2 synthetic_forward True \n",
|
75 |
+
"3 synthetic_forward True \n",
|
76 |
+
"4 synthetic_forward_from_backward False "
|
77 |
+
],
|
78 |
+
"text/html": [
|
79 |
+
"<div>\n",
|
80 |
+
"<style scoped>\n",
|
81 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
82 |
+
" vertical-align: middle;\n",
|
83 |
+
" }\n",
|
84 |
+
"\n",
|
85 |
+
" .dataframe tbody tr th {\n",
|
86 |
+
" vertical-align: top;\n",
|
87 |
+
" }\n",
|
88 |
+
"\n",
|
89 |
+
" .dataframe thead th {\n",
|
90 |
+
" text-align: right;\n",
|
91 |
+
" }\n",
|
92 |
+
"</style>\n",
|
93 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
94 |
+
" <thead>\n",
|
95 |
+
" <tr style=\"text-align: right;\">\n",
|
96 |
+
" <th></th>\n",
|
97 |
+
" <th>hash</th>\n",
|
98 |
+
" <th>repo</th>\n",
|
99 |
+
" <th>G_text</th>\n",
|
100 |
+
" <th>E_text</th>\n",
|
101 |
+
" <th>G_type</th>\n",
|
102 |
+
" <th>E_type</th>\n",
|
103 |
+
" <th>is_related</th>\n",
|
104 |
+
" </tr>\n",
|
105 |
+
" </thead>\n",
|
106 |
+
" <tbody>\n",
|
107 |
+
" <tr>\n",
|
108 |
+
" <th>0</th>\n",
|
109 |
+
" <td>2febb99eee8ed71c9122db88ca58dd33be0b9550</td>\n",
|
110 |
+
" <td>mesonbuild/meson</td>\n",
|
111 |
+
" <td>Enhance OptionOverrideProxy and simplify optio...</td>\n",
|
112 |
+
" <td>Enhance OptionOverrideProxy for multiple optio...</td>\n",
|
113 |
+
" <td>synthetic_backward</td>\n",
|
114 |
+
" <td>expert_labeled</td>\n",
|
115 |
+
" <td>True</td>\n",
|
116 |
+
" </tr>\n",
|
117 |
+
" <tr>\n",
|
118 |
+
" <th>1</th>\n",
|
119 |
+
" <td>2febb99eee8ed71c9122db88ca58dd33be0b9550</td>\n",
|
120 |
+
" <td>mesonbuild/meson</td>\n",
|
121 |
+
" <td>Enhance OptionOverrideProxy and simplify optio...</td>\n",
|
122 |
+
" <td>Refactor OptionOverrideProxy and Backend class...</td>\n",
|
123 |
+
" <td>synthetic_backward</td>\n",
|
124 |
+
" <td>synthetic_forward</td>\n",
|
125 |
+
" <td>True</td>\n",
|
126 |
+
" </tr>\n",
|
127 |
+
" <tr>\n",
|
128 |
+
" <th>2</th>\n",
|
129 |
+
" <td>2febb99eee8ed71c9122db88ca58dd33be0b9550</td>\n",
|
130 |
+
" <td>mesonbuild/meson</td>\n",
|
131 |
+
" <td>Enhance OptionOverrideProxy and simplify optio...</td>\n",
|
132 |
+
" <td>Refactor OptionOverrideProxy and backend optio...</td>\n",
|
133 |
+
" <td>synthetic_backward</td>\n",
|
134 |
+
" <td>synthetic_forward</td>\n",
|
135 |
+
" <td>True</td>\n",
|
136 |
+
" </tr>\n",
|
137 |
+
" <tr>\n",
|
138 |
+
" <th>3</th>\n",
|
139 |
+
" <td>2febb99eee8ed71c9122db88ca58dd33be0b9550</td>\n",
|
140 |
+
" <td>mesonbuild/meson</td>\n",
|
141 |
+
" <td>Enhance OptionOverrideProxy and simplify optio...</td>\n",
|
142 |
+
" <td>Refactor: Enhance OptionOverrideProxy for mult...</td>\n",
|
143 |
+
" <td>synthetic_backward</td>\n",
|
144 |
+
" <td>synthetic_forward</td>\n",
|
145 |
+
" <td>True</td>\n",
|
146 |
+
" </tr>\n",
|
147 |
+
" <tr>\n",
|
148 |
+
" <th>4</th>\n",
|
149 |
+
" <td>2febb99eee8ed71c9122db88ca58dd33be0b9550</td>\n",
|
150 |
+
" <td>mesonbuild/meson</td>\n",
|
151 |
+
" <td>Enhance OptionOverrideProxy and simplify optio...</td>\n",
|
152 |
+
" <td>Refactor OptionOverrideProxy and add target-sp...</td>\n",
|
153 |
+
" <td>synthetic_backward</td>\n",
|
154 |
+
" <td>synthetic_forward_from_backward</td>\n",
|
155 |
+
" <td>False</td>\n",
|
156 |
+
" </tr>\n",
|
157 |
+
" </tbody>\n",
|
158 |
+
"</table>\n",
|
159 |
+
"</div>"
|
160 |
+
]
|
161 |
+
},
|
162 |
+
"execution_count": 3,
|
163 |
+
"metadata": {},
|
164 |
+
"output_type": "execute_result"
|
165 |
+
}
|
166 |
+
],
|
167 |
+
"execution_count": 3
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"metadata": {},
|
171 |
+
"cell_type": "markdown",
|
172 |
+
"source": "## Full",
|
173 |
+
"id": "922e7a73f11a4aec"
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"metadata": {
|
177 |
+
"ExecuteTime": {
|
178 |
+
"end_time": "2024-10-15T18:43:14.266540Z",
|
179 |
+
"start_time": "2024-10-15T18:43:14.262103Z"
|
180 |
+
}
|
181 |
+
},
|
182 |
+
"cell_type": "code",
|
183 |
+
"source": "len(df.loc[df.is_related])",
|
184 |
+
"id": "562d9c53da109d1a",
|
185 |
+
"outputs": [
|
186 |
+
{
|
187 |
+
"data": {
|
188 |
+
"text/plain": [
|
189 |
+
"656"
|
190 |
+
]
|
191 |
+
},
|
192 |
+
"execution_count": 4,
|
193 |
+
"metadata": {},
|
194 |
+
"output_type": "execute_result"
|
195 |
+
}
|
196 |
+
],
|
197 |
+
"execution_count": 4
|
198 |
+
},
|
199 |
+
{
|
200 |
+
"metadata": {
|
201 |
+
"ExecuteTime": {
|
202 |
+
"end_time": "2024-10-15T18:43:18.073966Z",
|
203 |
+
"start_time": "2024-10-15T18:43:18.069219Z"
|
204 |
+
}
|
205 |
+
},
|
206 |
+
"cell_type": "code",
|
207 |
+
"source": "df.loc[df.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
208 |
+
"id": "b4f3c96a4b676a0d",
|
209 |
+
"outputs": [
|
210 |
+
{
|
211 |
+
"data": {
|
212 |
+
"text/plain": [
|
213 |
+
"43.733333333333334"
|
214 |
+
]
|
215 |
+
},
|
216 |
+
"execution_count": 5,
|
217 |
+
"metadata": {},
|
218 |
+
"output_type": "execute_result"
|
219 |
+
}
|
220 |
+
],
|
221 |
+
"execution_count": 5
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"metadata": {
|
225 |
+
"ExecuteTime": {
|
226 |
+
"end_time": "2024-10-15T18:43:19.026689Z",
|
227 |
+
"start_time": "2024-10-15T18:43:19.021680Z"
|
228 |
+
}
|
229 |
+
},
|
230 |
+
"cell_type": "code",
|
231 |
+
"source": "len(df.loc[~df.is_related])",
|
232 |
+
"id": "54d9f32f1d18844f",
|
233 |
+
"outputs": [
|
234 |
+
{
|
235 |
+
"data": {
|
236 |
+
"text/plain": [
|
237 |
+
"5140"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
"execution_count": 6,
|
241 |
+
"metadata": {},
|
242 |
+
"output_type": "execute_result"
|
243 |
+
}
|
244 |
+
],
|
245 |
+
"execution_count": 6
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"metadata": {
|
249 |
+
"ExecuteTime": {
|
250 |
+
"end_time": "2024-10-15T18:43:19.484304Z",
|
251 |
+
"start_time": "2024-10-15T18:43:19.480012Z"
|
252 |
+
}
|
253 |
+
},
|
254 |
+
"cell_type": "code",
|
255 |
+
"source": "df.loc[~df.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
256 |
+
"id": "679761631517b9e4",
|
257 |
+
"outputs": [
|
258 |
+
{
|
259 |
+
"data": {
|
260 |
+
"text/plain": [
|
261 |
+
"342.6666666666667"
|
262 |
+
]
|
263 |
+
},
|
264 |
+
"execution_count": 7,
|
265 |
+
"metadata": {},
|
266 |
+
"output_type": "execute_result"
|
267 |
+
}
|
268 |
+
],
|
269 |
+
"execution_count": 7
|
270 |
+
},
|
271 |
+
{
|
272 |
+
"metadata": {},
|
273 |
+
"cell_type": "markdown",
|
274 |
+
"source": "## Expert-labeled",
|
275 |
+
"id": "84561ea89717d61a"
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"metadata": {
|
279 |
+
"ExecuteTime": {
|
280 |
+
"end_time": "2024-10-15T18:45:52.905631Z",
|
281 |
+
"start_time": "2024-10-15T18:45:52.901913Z"
|
282 |
+
}
|
283 |
+
},
|
284 |
+
"cell_type": "code",
|
285 |
+
"source": "_ = df.loc[(df.G_type == \"initial\") & (df.E_type == \"expert_labeled\")]",
|
286 |
+
"id": "be1c800f45cef26e",
|
287 |
+
"outputs": [],
|
288 |
+
"execution_count": 36
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"metadata": {
|
292 |
+
"ExecuteTime": {
|
293 |
+
"end_time": "2024-10-15T18:45:53.234109Z",
|
294 |
+
"start_time": "2024-10-15T18:45:53.230986Z"
|
295 |
+
}
|
296 |
+
},
|
297 |
+
"cell_type": "code",
|
298 |
+
"source": "len(_.loc[_.is_related])",
|
299 |
+
"id": "1d092dff4d39bcd1",
|
300 |
+
"outputs": [
|
301 |
+
{
|
302 |
+
"data": {
|
303 |
+
"text/plain": [
|
304 |
+
"57"
|
305 |
+
]
|
306 |
+
},
|
307 |
+
"execution_count": 37,
|
308 |
+
"metadata": {},
|
309 |
+
"output_type": "execute_result"
|
310 |
+
}
|
311 |
+
],
|
312 |
+
"execution_count": 37
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"metadata": {
|
316 |
+
"ExecuteTime": {
|
317 |
+
"end_time": "2024-10-15T18:45:53.629311Z",
|
318 |
+
"start_time": "2024-10-15T18:45:53.625620Z"
|
319 |
+
}
|
320 |
+
},
|
321 |
+
"cell_type": "code",
|
322 |
+
"source": "_.loc[_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
323 |
+
"id": "a06a532cd5a29725",
|
324 |
+
"outputs": [
|
325 |
+
{
|
326 |
+
"data": {
|
327 |
+
"text/plain": [
|
328 |
+
"3.8"
|
329 |
+
]
|
330 |
+
},
|
331 |
+
"execution_count": 38,
|
332 |
+
"metadata": {},
|
333 |
+
"output_type": "execute_result"
|
334 |
+
}
|
335 |
+
],
|
336 |
+
"execution_count": 38
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"metadata": {
|
340 |
+
"ExecuteTime": {
|
341 |
+
"end_time": "2024-10-15T18:45:53.956790Z",
|
342 |
+
"start_time": "2024-10-15T18:45:53.953842Z"
|
343 |
+
}
|
344 |
+
},
|
345 |
+
"cell_type": "code",
|
346 |
+
"source": "len(_.loc[~_.is_related])",
|
347 |
+
"id": "5e19c8a6309b62aa",
|
348 |
+
"outputs": [
|
349 |
+
{
|
350 |
+
"data": {
|
351 |
+
"text/plain": [
|
352 |
+
"0"
|
353 |
+
]
|
354 |
+
},
|
355 |
+
"execution_count": 39,
|
356 |
+
"metadata": {},
|
357 |
+
"output_type": "execute_result"
|
358 |
+
}
|
359 |
+
],
|
360 |
+
"execution_count": 39
|
361 |
+
},
|
362 |
+
{
|
363 |
+
"metadata": {
|
364 |
+
"ExecuteTime": {
|
365 |
+
"end_time": "2024-10-15T18:46:02.554527Z",
|
366 |
+
"start_time": "2024-10-15T18:46:02.551084Z"
|
367 |
+
}
|
368 |
+
},
|
369 |
+
"cell_type": "code",
|
370 |
+
"source": "_.loc[~_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
371 |
+
"id": "e43179c5dcab5eb2",
|
372 |
+
"outputs": [
|
373 |
+
{
|
374 |
+
"data": {
|
375 |
+
"text/plain": [
|
376 |
+
"nan"
|
377 |
+
]
|
378 |
+
},
|
379 |
+
"execution_count": 40,
|
380 |
+
"metadata": {},
|
381 |
+
"output_type": "execute_result"
|
382 |
+
}
|
383 |
+
],
|
384 |
+
"execution_count": 40
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"metadata": {},
|
388 |
+
"cell_type": "markdown",
|
389 |
+
"source": "## Backward",
|
390 |
+
"id": "70ee052fae2f88e3"
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"metadata": {
|
394 |
+
"ExecuteTime": {
|
395 |
+
"end_time": "2024-10-15T18:44:33.559606Z",
|
396 |
+
"start_time": "2024-10-15T18:44:33.556802Z"
|
397 |
+
}
|
398 |
+
},
|
399 |
+
"cell_type": "code",
|
400 |
+
"source": "_ = df.loc[(df.G_type == \"synthetic_backward\") & (~df.E_type.isin([\"synthetic_forward\", \"synthetic_forward_from_backward\"]))]",
|
401 |
+
"id": "99f51ecc55c4db35",
|
402 |
+
"outputs": [],
|
403 |
+
"execution_count": 20
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"metadata": {
|
407 |
+
"ExecuteTime": {
|
408 |
+
"end_time": "2024-10-15T18:44:33.958325Z",
|
409 |
+
"start_time": "2024-10-15T18:44:33.955847Z"
|
410 |
+
}
|
411 |
+
},
|
412 |
+
"cell_type": "code",
|
413 |
+
"source": "len(_.loc[_.is_related])",
|
414 |
+
"id": "6ff29390c8e127c2",
|
415 |
+
"outputs": [
|
416 |
+
{
|
417 |
+
"data": {
|
418 |
+
"text/plain": [
|
419 |
+
"104"
|
420 |
+
]
|
421 |
+
},
|
422 |
+
"execution_count": 21,
|
423 |
+
"metadata": {},
|
424 |
+
"output_type": "execute_result"
|
425 |
+
}
|
426 |
+
],
|
427 |
+
"execution_count": 21
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"metadata": {
|
431 |
+
"ExecuteTime": {
|
432 |
+
"end_time": "2024-10-15T18:44:34.455560Z",
|
433 |
+
"start_time": "2024-10-15T18:44:34.452303Z"
|
434 |
+
}
|
435 |
+
},
|
436 |
+
"cell_type": "code",
|
437 |
+
"source": "_.loc[_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
438 |
+
"id": "e1ae04e1ecfb2040",
|
439 |
+
"outputs": [
|
440 |
+
{
|
441 |
+
"data": {
|
442 |
+
"text/plain": [
|
443 |
+
"7.428571428571429"
|
444 |
+
]
|
445 |
+
},
|
446 |
+
"execution_count": 22,
|
447 |
+
"metadata": {},
|
448 |
+
"output_type": "execute_result"
|
449 |
+
}
|
450 |
+
],
|
451 |
+
"execution_count": 22
|
452 |
+
},
|
453 |
+
{
|
454 |
+
"metadata": {
|
455 |
+
"ExecuteTime": {
|
456 |
+
"end_time": "2024-10-15T18:44:34.903849Z",
|
457 |
+
"start_time": "2024-10-15T18:44:34.901226Z"
|
458 |
+
}
|
459 |
+
},
|
460 |
+
"cell_type": "code",
|
461 |
+
"source": "len(_.loc[~_.is_related])",
|
462 |
+
"id": "125c4c335e7761da",
|
463 |
+
"outputs": [
|
464 |
+
{
|
465 |
+
"data": {
|
466 |
+
"text/plain": [
|
467 |
+
"1048"
|
468 |
+
]
|
469 |
+
},
|
470 |
+
"execution_count": 23,
|
471 |
+
"metadata": {},
|
472 |
+
"output_type": "execute_result"
|
473 |
+
}
|
474 |
+
],
|
475 |
+
"execution_count": 23
|
476 |
+
},
|
477 |
+
{
|
478 |
+
"metadata": {
|
479 |
+
"ExecuteTime": {
|
480 |
+
"end_time": "2024-10-15T18:44:35.783538Z",
|
481 |
+
"start_time": "2024-10-15T18:44:35.778676Z"
|
482 |
+
}
|
483 |
+
},
|
484 |
+
"cell_type": "code",
|
485 |
+
"source": "_.loc[~_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
486 |
+
"id": "4782f1d6e6863f89",
|
487 |
+
"outputs": [
|
488 |
+
{
|
489 |
+
"data": {
|
490 |
+
"text/plain": [
|
491 |
+
"74.85714285714286"
|
492 |
+
]
|
493 |
+
},
|
494 |
+
"execution_count": 24,
|
495 |
+
"metadata": {},
|
496 |
+
"output_type": "execute_result"
|
497 |
+
}
|
498 |
+
],
|
499 |
+
"execution_count": 24
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"metadata": {},
|
503 |
+
"cell_type": "markdown",
|
504 |
+
"source": "## Forward",
|
505 |
+
"id": "bf61a4b422f779fa"
|
506 |
+
},
|
507 |
+
{
|
508 |
+
"metadata": {},
|
509 |
+
"cell_type": "markdown",
|
510 |
+
"source": "### From human",
|
511 |
+
"id": "1429f9f99acf75d"
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"metadata": {
|
515 |
+
"ExecuteTime": {
|
516 |
+
"end_time": "2024-10-15T18:46:21.359807Z",
|
517 |
+
"start_time": "2024-10-15T18:46:21.356451Z"
|
518 |
+
}
|
519 |
+
},
|
520 |
+
"cell_type": "code",
|
521 |
+
"source": "_ = df.loc[(df.G_type == \"initial\") & (df.E_type == \"synthetic_forward\")]",
|
522 |
+
"id": "e13d55b0124f04b3",
|
523 |
+
"outputs": [],
|
524 |
+
"execution_count": 41
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"metadata": {
|
528 |
+
"ExecuteTime": {
|
529 |
+
"end_time": "2024-10-15T18:46:21.798508Z",
|
530 |
+
"start_time": "2024-10-15T18:46:21.795885Z"
|
531 |
+
}
|
532 |
+
},
|
533 |
+
"cell_type": "code",
|
534 |
+
"source": "len(_.loc[_.is_related])",
|
535 |
+
"id": "b8353390df7da427",
|
536 |
+
"outputs": [
|
537 |
+
{
|
538 |
+
"data": {
|
539 |
+
"text/plain": [
|
540 |
+
"177"
|
541 |
+
]
|
542 |
+
},
|
543 |
+
"execution_count": 42,
|
544 |
+
"metadata": {},
|
545 |
+
"output_type": "execute_result"
|
546 |
+
}
|
547 |
+
],
|
548 |
+
"execution_count": 42
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"metadata": {
|
552 |
+
"ExecuteTime": {
|
553 |
+
"end_time": "2024-10-15T18:46:22.163595Z",
|
554 |
+
"start_time": "2024-10-15T18:46:22.160176Z"
|
555 |
+
}
|
556 |
+
},
|
557 |
+
"cell_type": "code",
|
558 |
+
"source": "_.loc[_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
559 |
+
"id": "ac89afde65efd73d",
|
560 |
+
"outputs": [
|
561 |
+
{
|
562 |
+
"data": {
|
563 |
+
"text/plain": [
|
564 |
+
"11.8"
|
565 |
+
]
|
566 |
+
},
|
567 |
+
"execution_count": 43,
|
568 |
+
"metadata": {},
|
569 |
+
"output_type": "execute_result"
|
570 |
+
}
|
571 |
+
],
|
572 |
+
"execution_count": 43
|
573 |
+
},
|
574 |
+
{
|
575 |
+
"metadata": {
|
576 |
+
"ExecuteTime": {
|
577 |
+
"end_time": "2024-10-15T18:46:22.552314Z",
|
578 |
+
"start_time": "2024-10-15T18:46:22.549570Z"
|
579 |
+
}
|
580 |
+
},
|
581 |
+
"cell_type": "code",
|
582 |
+
"source": "len(_.loc[~_.is_related])",
|
583 |
+
"id": "9b6cb335e3bbb7ff",
|
584 |
+
"outputs": [
|
585 |
+
{
|
586 |
+
"data": {
|
587 |
+
"text/plain": [
|
588 |
+
"0"
|
589 |
+
]
|
590 |
+
},
|
591 |
+
"execution_count": 44,
|
592 |
+
"metadata": {},
|
593 |
+
"output_type": "execute_result"
|
594 |
+
}
|
595 |
+
],
|
596 |
+
"execution_count": 44
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"metadata": {
|
600 |
+
"ExecuteTime": {
|
601 |
+
"end_time": "2024-10-15T18:46:23.237736Z",
|
602 |
+
"start_time": "2024-10-15T18:46:23.234085Z"
|
603 |
+
}
|
604 |
+
},
|
605 |
+
"cell_type": "code",
|
606 |
+
"source": "__.loc[~__.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
607 |
+
"id": "fe22189a70fc4149",
|
608 |
+
"outputs": [
|
609 |
+
{
|
610 |
+
"data": {
|
611 |
+
"text/plain": [
|
612 |
+
"nan"
|
613 |
+
]
|
614 |
+
},
|
615 |
+
"execution_count": 45,
|
616 |
+
"metadata": {},
|
617 |
+
"output_type": "execute_result"
|
618 |
+
}
|
619 |
+
],
|
620 |
+
"execution_count": 45
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"metadata": {},
|
624 |
+
"cell_type": "markdown",
|
625 |
+
"source": "### From backward",
|
626 |
+
"id": "ace7afb876fb88a0"
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"metadata": {
|
630 |
+
"ExecuteTime": {
|
631 |
+
"end_time": "2024-10-15T18:47:06.641374Z",
|
632 |
+
"start_time": "2024-10-15T18:47:06.637018Z"
|
633 |
+
}
|
634 |
+
},
|
635 |
+
"cell_type": "code",
|
636 |
+
"source": "_ = df.loc[(df.G_type == \"synthetic_backward\") & (df.E_type.isin([\"synthetic_forward\", \"synthetic_forward_from_backward\"]))]",
|
637 |
+
"id": "88800960dbff619a",
|
638 |
+
"outputs": [],
|
639 |
+
"execution_count": 53
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"metadata": {
|
643 |
+
"ExecuteTime": {
|
644 |
+
"end_time": "2024-10-15T18:47:15.358650Z",
|
645 |
+
"start_time": "2024-10-15T18:47:15.355108Z"
|
646 |
+
}
|
647 |
+
},
|
648 |
+
"cell_type": "code",
|
649 |
+
"source": "len(_.loc[_.is_related])",
|
650 |
+
"id": "890613156e005c83",
|
651 |
+
"outputs": [
|
652 |
+
{
|
653 |
+
"data": {
|
654 |
+
"text/plain": [
|
655 |
+
"318"
|
656 |
+
]
|
657 |
+
},
|
658 |
+
"execution_count": 56,
|
659 |
+
"metadata": {},
|
660 |
+
"output_type": "execute_result"
|
661 |
+
}
|
662 |
+
],
|
663 |
+
"execution_count": 56
|
664 |
+
},
|
665 |
+
{
|
666 |
+
"metadata": {
|
667 |
+
"ExecuteTime": {
|
668 |
+
"end_time": "2024-10-15T18:47:15.579415Z",
|
669 |
+
"start_time": "2024-10-15T18:47:15.576016Z"
|
670 |
+
}
|
671 |
+
},
|
672 |
+
"cell_type": "code",
|
673 |
+
"source": "_.loc[_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
674 |
+
"id": "999f91382a2c8ff6",
|
675 |
+
"outputs": [
|
676 |
+
{
|
677 |
+
"data": {
|
678 |
+
"text/plain": [
|
679 |
+
"22.714285714285715"
|
680 |
+
]
|
681 |
+
},
|
682 |
+
"execution_count": 57,
|
683 |
+
"metadata": {},
|
684 |
+
"output_type": "execute_result"
|
685 |
+
}
|
686 |
+
],
|
687 |
+
"execution_count": 57
|
688 |
+
},
|
689 |
+
{
|
690 |
+
"metadata": {
|
691 |
+
"ExecuteTime": {
|
692 |
+
"end_time": "2024-10-15T18:47:15.834218Z",
|
693 |
+
"start_time": "2024-10-15T18:47:15.831258Z"
|
694 |
+
}
|
695 |
+
},
|
696 |
+
"cell_type": "code",
|
697 |
+
"source": "len(_.loc[~_.is_related])",
|
698 |
+
"id": "d347941cbb4b2db1",
|
699 |
+
"outputs": [
|
700 |
+
{
|
701 |
+
"data": {
|
702 |
+
"text/plain": [
|
703 |
+
"3753"
|
704 |
+
]
|
705 |
+
},
|
706 |
+
"execution_count": 58,
|
707 |
+
"metadata": {},
|
708 |
+
"output_type": "execute_result"
|
709 |
+
}
|
710 |
+
],
|
711 |
+
"execution_count": 58
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"metadata": {
|
715 |
+
"ExecuteTime": {
|
716 |
+
"end_time": "2024-10-15T18:47:16.138798Z",
|
717 |
+
"start_time": "2024-10-15T18:47:16.133397Z"
|
718 |
+
}
|
719 |
+
},
|
720 |
+
"cell_type": "code",
|
721 |
+
"source": "_.loc[~_.is_related].groupby([\"hash\", \"repo\"]).G_text.count().mean()",
|
722 |
+
"id": "2db4d96713a8634d",
|
723 |
+
"outputs": [
|
724 |
+
{
|
725 |
+
"data": {
|
726 |
+
"text/plain": [
|
727 |
+
"268.07142857142856"
|
728 |
+
]
|
729 |
+
},
|
730 |
+
"execution_count": 59,
|
731 |
+
"metadata": {},
|
732 |
+
"output_type": "execute_result"
|
733 |
+
}
|
734 |
+
],
|
735 |
+
"execution_count": 59
|
736 |
+
}
|
737 |
+
],
|
738 |
+
"metadata": {
|
739 |
+
"kernelspec": {
|
740 |
+
"display_name": "Python 3",
|
741 |
+
"language": "python",
|
742 |
+
"name": "python3"
|
743 |
+
},
|
744 |
+
"language_info": {
|
745 |
+
"codemirror_mode": {
|
746 |
+
"name": "ipython",
|
747 |
+
"version": 2
|
748 |
+
},
|
749 |
+
"file_extension": ".py",
|
750 |
+
"mimetype": "text/x-python",
|
751 |
+
"name": "python",
|
752 |
+
"nbconvert_exporter": "python",
|
753 |
+
"pygments_lexer": "ipython2",
|
754 |
+
"version": "2.7.6"
|
755 |
+
}
|
756 |
+
},
|
757 |
+
"nbformat": 4,
|
758 |
+
"nbformat_minor": 5
|
759 |
+
}
|
generation_steps/metrics_analysis.py
CHANGED
@@ -1,20 +1,15 @@
|
|
1 |
-
import Levenshtein
|
2 |
import evaluate
|
3 |
-
import pandas as pd
|
4 |
-
from tqdm import tqdm
|
5 |
-
|
6 |
import config
|
7 |
-
from
|
8 |
-
|
9 |
-
from custom_metrics import gpt_eval
|
10 |
|
11 |
-
BLEU = evaluate.load('
|
12 |
|
13 |
|
14 |
def bleu_fn(pred, ref, **kwargs):
|
15 |
if "refs" in kwargs:
|
16 |
-
return BLEU.compute(predictions=[pred] * len(kwargs["refs"]), references=kwargs["refs"])["
|
17 |
-
return BLEU.compute(predictions=[pred], references=[ref])["
|
18 |
|
19 |
|
20 |
METEOR = evaluate.load('meteor', cache_dir=config.CACHE_DIR)
|
@@ -67,76 +62,23 @@ def chrf_fn(pred, ref, **kwargs):
|
|
67 |
return CHRF.compute(predictions=[pred], references=[[ref]])["score"]
|
68 |
|
69 |
|
70 |
-
TER = evaluate.load("ter")
|
71 |
-
|
72 |
-
|
73 |
-
def ter_fn(pred, ref, **kwargs):
|
74 |
-
if "refs" in kwargs:
|
75 |
-
scores = [TER.compute(predictions=[pred], references=[[ref]])["score"] for ref in kwargs["refs"]]
|
76 |
-
return sum(scores) / len(scores)
|
77 |
-
return TER.compute(predictions=[pred], references=[[ref]])["score"]
|
78 |
-
|
79 |
-
|
80 |
def edit_distance_fn(pred, ref, **kwargs):
|
81 |
if "refs" in kwargs:
|
82 |
-
scores = [
|
83 |
return sum(scores) / len(scores)
|
84 |
-
return
|
85 |
|
86 |
|
87 |
def edit_distance_norm_fn(pred, ref, **kwargs):
|
88 |
if "refs" in kwargs:
|
89 |
-
scores = [
|
90 |
-
return sum(scores) / len(scores)
|
91 |
-
return Levenshtein.distance(pred, ref) / len(pred)
|
92 |
-
|
93 |
-
|
94 |
-
def edit_time_fn(pred, ref, **kwargs):
|
95 |
-
return kwargs["edittime"]
|
96 |
-
|
97 |
-
|
98 |
-
def gptscore_ref_1_fn(pred, ref, **kwargs):
|
99 |
-
if "refs" in kwargs:
|
100 |
-
scores = [gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=1) for ref in kwargs["refs"]]
|
101 |
return sum(scores) / len(scores)
|
102 |
-
return
|
103 |
-
|
104 |
-
|
105 |
-
def gptscore_ref_3_fn(pred, ref, **kwargs):
|
106 |
-
if "refs" in kwargs:
|
107 |
-
scores = [gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=3) for ref in kwargs["refs"]]
|
108 |
-
return sum(scores) / len(scores)
|
109 |
-
return gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=3)
|
110 |
-
|
111 |
-
|
112 |
-
def gptscore_ref_5_fn(pred, ref, **kwargs):
|
113 |
-
if "refs" in kwargs:
|
114 |
-
scores = [gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=5) for ref in kwargs["refs"]]
|
115 |
-
return sum(scores) / len(scores)
|
116 |
-
return gpt_eval.compute_ref(prediction=pred, reference=ref, n_requests=5)
|
117 |
-
|
118 |
-
|
119 |
-
def gptscore_noref_1_fn(pred, ref, **kwargs):
|
120 |
-
return gpt_eval.compute_noref(prediction=pred, diff=kwargs['diff'], n_requests=1)
|
121 |
-
|
122 |
-
|
123 |
-
def gptscore_noref_3_fn(pred, ref, **kwargs):
|
124 |
-
return gpt_eval.compute_noref(prediction=pred, diff=kwargs['diff'], n_requests=3)
|
125 |
-
|
126 |
|
127 |
-
def gptscore_noref_5_fn(pred, ref, **kwargs):
|
128 |
-
return gpt_eval.compute_noref(prediction=pred, diff=kwargs['diff'], n_requests=5)
|
129 |
|
130 |
-
|
131 |
-
IND_METRICS = {
|
132 |
"editdist": edit_distance_fn,
|
133 |
-
"
|
134 |
-
# "gptscore-ref-1-req": gptscore_ref_1_fn,
|
135 |
-
# "gptscore-ref-3-req": gptscore_ref_3_fn,
|
136 |
-
# "gptscore-ref-5-req": gptscore_ref_5_fn,
|
137 |
-
# "gptscore-noref-1-req": gptscore_noref_1_fn,
|
138 |
-
# "gptscore-noref-3-req": gptscore_noref_3_fn,
|
139 |
-
# "gptscore-noref-5-req": gptscore_noref_5_fn,
|
140 |
"bleu": bleu_fn,
|
141 |
"meteor": meteor_fn,
|
142 |
"rouge1": rouge1_fn,
|
@@ -144,115 +86,9 @@ IND_METRICS = {
|
|
144 |
"rougeL": rougeL_fn,
|
145 |
"bertscore": bertscore_fn,
|
146 |
"chrF": chrf_fn,
|
147 |
-
"ter": ter_fn,
|
148 |
}
|
149 |
|
150 |
-
AGGR_METRICS = {}
|
151 |
-
# AGGR_METRICS = IND_METRICS.copy()
|
152 |
-
# del AGGR_METRICS["gptscore-ref-1-req"]
|
153 |
-
# del AGGR_METRICS["gptscore-noref-1-req"]
|
154 |
|
155 |
REL_METRICS = {
|
156 |
"editdist": edit_distance_fn,
|
157 |
-
"editdist-norm": edit_distance_norm_fn,
|
158 |
-
"edittime": edit_time_fn,
|
159 |
}
|
160 |
-
|
161 |
-
|
162 |
-
def attach_references(df):
|
163 |
-
reference_df = hf_data_loader.load_full_commit_as_pandas().set_index(["hash", "repo"])[["reference"]]
|
164 |
-
df = df.set_index(["hash", "repo"])
|
165 |
-
return df.join(other=reference_df, how="left").reset_index()
|
166 |
-
|
167 |
-
|
168 |
-
def compute_metrics(df):
|
169 |
-
tqdm.pandas()
|
170 |
-
|
171 |
-
def apply_metric_fn_to_row(row, fn, col_pred, col_ref):
|
172 |
-
return fn(row[col_pred], row[col_ref], edittime=row['edit_time'], diff=str(row['mods']))
|
173 |
-
|
174 |
-
for metric in AGGR_METRICS:
|
175 |
-
print(f"Computing {metric} for the aggregated independent pairs")
|
176 |
-
values = []
|
177 |
-
for i, row in tqdm(df.iterrows(), total=len(df)):
|
178 |
-
others = df[(df["hash"] == row["hash"]) & (df["repo"] == row["repo"]) & (
|
179 |
-
df["commit_msg_start"] != row["commit_msg_start"]) & (
|
180 |
-
df["commit_msg_end"] != row["commit_msg_end"])]['commit_msg_end'].to_list()
|
181 |
-
others.append(row["reference"])
|
182 |
-
others = list(set(others))
|
183 |
-
metric_fn = AGGR_METRICS[metric]
|
184 |
-
values.append(
|
185 |
-
metric_fn(
|
186 |
-
row['commit_msg_start'], None, refs=others, edittime=row['edit_time'], diff=str(row['mods'])
|
187 |
-
)
|
188 |
-
)
|
189 |
-
df[f"{metric}_aggr"] = values
|
190 |
-
|
191 |
-
for metric in REL_METRICS:
|
192 |
-
print(f"Computing {metric} for the related pairs")
|
193 |
-
metric_fn = REL_METRICS[metric]
|
194 |
-
df[f"{metric}_related"] = df.progress_apply(
|
195 |
-
lambda row: apply_metric_fn_to_row(row=row,
|
196 |
-
fn=metric_fn,
|
197 |
-
col_pred="commit_msg_start",
|
198 |
-
col_ref="commit_msg_end"),
|
199 |
-
axis=1
|
200 |
-
)
|
201 |
-
|
202 |
-
for metric in IND_METRICS:
|
203 |
-
print(f"Computing {metric} for the independent pairs")
|
204 |
-
metric_fn = IND_METRICS[metric]
|
205 |
-
df[f"{metric}_independent"] = df.progress_apply(
|
206 |
-
lambda row: apply_metric_fn_to_row(row=row,
|
207 |
-
fn=metric_fn,
|
208 |
-
col_pred="commit_msg_start",
|
209 |
-
col_ref="reference"),
|
210 |
-
axis=1
|
211 |
-
)
|
212 |
-
|
213 |
-
for rel_metric in REL_METRICS:
|
214 |
-
for ind_metric in IND_METRICS:
|
215 |
-
df[f"rel_{rel_metric}_ind_{ind_metric}_pearson"] = (
|
216 |
-
df[f"{rel_metric}_related"].corr(df[f"{ind_metric}_independent"], method="pearson"))
|
217 |
-
|
218 |
-
df[f"rel_{rel_metric}_ind_{ind_metric}_spearman"] = (
|
219 |
-
df[f"{rel_metric}_related"].corr(df[f"{ind_metric}_independent"], method="spearman"))
|
220 |
-
|
221 |
-
for aggr_metric in AGGR_METRICS:
|
222 |
-
df[f"rel_{rel_metric}_aggr_{aggr_metric}_pearson"] = (
|
223 |
-
df[f"{rel_metric}_related"].corr(df[f"{aggr_metric}_aggr"], method="pearson"))
|
224 |
-
|
225 |
-
df[f"rel_{rel_metric}_aggr_{aggr_metric}_spearman"] = (
|
226 |
-
df[f"{rel_metric}_related"].corr(df[f"{aggr_metric}_aggr"], method="spearman"))
|
227 |
-
|
228 |
-
return df
|
229 |
-
|
230 |
-
|
231 |
-
def compute_correlations(df: pd.DataFrame):
|
232 |
-
grouped_df = df.groupby(by=["end_to_start", "start_to_end"])
|
233 |
-
correlations = grouped_df.apply(correlations_for_group, include_groups=False)
|
234 |
-
return correlations
|
235 |
-
|
236 |
-
|
237 |
-
def transform(df):
|
238 |
-
print("Computing metrics")
|
239 |
-
|
240 |
-
df = attach_references(df)
|
241 |
-
df = compute_metrics(df)
|
242 |
-
|
243 |
-
correlations_for_groups = compute_correlations(df)
|
244 |
-
correlations_for_groups.to_csv(config.METRICS_CORRELATIONS_ARTIFACT)
|
245 |
-
|
246 |
-
df.to_csv(config.SYNTHETIC_DATASET_ARTIFACT)
|
247 |
-
|
248 |
-
print("Done")
|
249 |
-
return df
|
250 |
-
|
251 |
-
|
252 |
-
def main():
|
253 |
-
df = pd.read_csv(config.START_TO_END_ARTIFACT, index_col=[0])
|
254 |
-
transform(df)
|
255 |
-
|
256 |
-
|
257 |
-
if __name__ == '__main__':
|
258 |
-
main()
|
|
|
|
|
1 |
import evaluate
|
|
|
|
|
|
|
2 |
import config
|
3 |
+
from rapidfuzz.distance.Levenshtein import distance, normalized_similarity
|
4 |
+
|
|
|
5 |
|
6 |
+
BLEU = evaluate.load('saridormi/b_norm', cache_dir=config.CACHE_DIR)
|
7 |
|
8 |
|
9 |
def bleu_fn(pred, ref, **kwargs):
|
10 |
if "refs" in kwargs:
|
11 |
+
return BLEU.compute(predictions=[pred] * len(kwargs["refs"]), references=kwargs["refs"])["b_norm"]
|
12 |
+
return BLEU.compute(predictions=[pred], references=[ref])["b_norm"]
|
13 |
|
14 |
|
15 |
METEOR = evaluate.load('meteor', cache_dir=config.CACHE_DIR)
|
|
|
62 |
return CHRF.compute(predictions=[pred], references=[[ref]])["score"]
|
63 |
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def edit_distance_fn(pred, ref, **kwargs):
|
66 |
if "refs" in kwargs:
|
67 |
+
scores = [distance(pred, ref) for ref in kwargs["refs"]]
|
68 |
return sum(scores) / len(scores)
|
69 |
+
return distance(pred, ref)
|
70 |
|
71 |
|
72 |
def edit_distance_norm_fn(pred, ref, **kwargs):
|
73 |
if "refs" in kwargs:
|
74 |
+
scores = [normalized_similarity(pred, ref) for ref in kwargs["refs"]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return sum(scores) / len(scores)
|
76 |
+
return normalized_similarity(pred, ref)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
|
|
|
|
78 |
|
79 |
+
AGGR_METRICS = {
|
|
|
80 |
"editdist": edit_distance_fn,
|
81 |
+
"editsim": edit_distance_norm_fn,
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
"bleu": bleu_fn,
|
83 |
"meteor": meteor_fn,
|
84 |
"rouge1": rouge1_fn,
|
|
|
86 |
"rougeL": rougeL_fn,
|
87 |
"bertscore": bertscore_fn,
|
88 |
"chrF": chrf_fn,
|
|
|
89 |
}
|
90 |
|
|
|
|
|
|
|
|
|
91 |
|
92 |
REL_METRICS = {
|
93 |
"editdist": edit_distance_fn,
|
|
|
|
|
94 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generation_steps/{synthetic_end_to_start.py β synthetic_backward.py}
RENAMED
File without changes
|
generation_steps/{synthetic_start_to_end.py β synthetic_forward.py}
RENAMED
File without changes
|
metrics_analysis.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "commit-message-editing-visualization"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Utilities for synthetic data generation, metrics analysis and visualization space for CMG Evaluaton."
|
5 |
+
authors = ["Your Name <you@example.com>"]
|
6 |
+
license = "MIT"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.9"
|
10 |
+
absl-py = "2.1.0"
|
11 |
+
aiofiles = "23.2.1"
|
12 |
+
aiohttp = "3.9.3"
|
13 |
+
aiosignal = "1.3.1"
|
14 |
+
altair = "5.3.0"
|
15 |
+
annotated-types = "0.6.0"
|
16 |
+
anyio = "4.3.0"
|
17 |
+
argon2-cffi = "23.1.0"
|
18 |
+
argon2-cffi-bindings = "21.2.0"
|
19 |
+
arrow = "1.3.0"
|
20 |
+
asttokens = "2.4.1"
|
21 |
+
async-lru = "2.0.4"
|
22 |
+
async-timeout = "4.0.3"
|
23 |
+
attrs = "23.2.0"
|
24 |
+
Babel = "2.14.0"
|
25 |
+
beautifulsoup4 = "4.12.3"
|
26 |
+
bert-score = "0.3.13"
|
27 |
+
bleach = "6.1.0"
|
28 |
+
cbor2 = "5.6.2"
|
29 |
+
certifi = "2024.2.2"
|
30 |
+
cffi = "1.16.0"
|
31 |
+
charset-normalizer = "3.3.2"
|
32 |
+
click = "8.1.7"
|
33 |
+
colorama = "0.4.6"
|
34 |
+
comm = "0.2.2"
|
35 |
+
contourpy = "1.2.1"
|
36 |
+
cycler = "0.12.1"
|
37 |
+
datasets = "2.18.0"
|
38 |
+
debugpy = "1.8.1"
|
39 |
+
decorator = "5.1.1"
|
40 |
+
defusedxml = "0.7.1"
|
41 |
+
diff-match-patch = "20230430"
|
42 |
+
dill = "0.3.8"
|
43 |
+
evaluate = "0.4.1"
|
44 |
+
exceptiongroup = "1.2.0"
|
45 |
+
executing = "2.0.1"
|
46 |
+
fastapi = "0.110.1"
|
47 |
+
fastjsonschema = "2.19.1"
|
48 |
+
ffmpy = "0.3.2"
|
49 |
+
filelock = "3.13.3"
|
50 |
+
fonttools = "4.50.0"
|
51 |
+
fqdn = "1.5.1"
|
52 |
+
frozenlist = "1.4.1"
|
53 |
+
fsspec = "2024.2.0"
|
54 |
+
gradio = "4.25.0"
|
55 |
+
gradio_client = "0.15.0"
|
56 |
+
h11 = "0.14.0"
|
57 |
+
httpcore = "1.0.5"
|
58 |
+
httpx = "0.27.0"
|
59 |
+
huggingface-hub = "0.22.2"
|
60 |
+
idna = "3.6"
|
61 |
+
importlib_metadata = "7.1.0"
|
62 |
+
importlib_resources = "6.4.0"
|
63 |
+
ipykernel = "6.29.4"
|
64 |
+
ipython = "8.18.1"
|
65 |
+
ipywidgets = "8.1.2"
|
66 |
+
isoduration = "20.11.0"
|
67 |
+
jedi = "0.19.1"
|
68 |
+
Jinja2 = "3.1.3"
|
69 |
+
joblib = "1.4.0"
|
70 |
+
json5 = "0.9.25"
|
71 |
+
jsonpointer = "2.4"
|
72 |
+
jsonschema = "4.21.1"
|
73 |
+
jsonschema-specifications = "2023.12.1"
|
74 |
+
kiwisolver = "1.4.5"
|
75 |
+
lxml = "5.2.1"
|
76 |
+
markdown-it-py = "3.0.0"
|
77 |
+
MarkupSafe = "2.1.5"
|
78 |
+
matplotlib = "3.8.4"
|
79 |
+
matplotlib-inline = "0.1.7"
|
80 |
+
mdurl = "0.1.2"
|
81 |
+
mistune = "3.0.2"
|
82 |
+
mpmath = "1.3.0"
|
83 |
+
multidict = "6.0.5"
|
84 |
+
multiprocess = "0.70.16"
|
85 |
+
nbclient = "0.10.0"
|
86 |
+
nbconvert = "7.16.4"
|
87 |
+
nbformat = "5.10.4"
|
88 |
+
nest-asyncio = "1.6.0"
|
89 |
+
networkx = "3.2.1"
|
90 |
+
nltk = "3.8.1"
|
91 |
+
numpy = "1.26.4"
|
92 |
+
orjson = "3.10.0"
|
93 |
+
overrides = "7.7.0"
|
94 |
+
packaging = "24.0"
|
95 |
+
pandas = "2.2.1"
|
96 |
+
pandocfilters = "1.5.1"
|
97 |
+
parso = "0.8.4"
|
98 |
+
pillow = "10.3.0"
|
99 |
+
platformdirs = "4.2.1"
|
100 |
+
portalocker = "2.8.2"
|
101 |
+
prometheus_client = "0.20.0"
|
102 |
+
prompt-toolkit = "3.0.43"
|
103 |
+
psutil = "5.9.8"
|
104 |
+
pure-eval = "0.2.2"
|
105 |
+
pyarrow = "15.0.2"
|
106 |
+
pyarrow-hotfix = "0.6"
|
107 |
+
pycparser = "2.22"
|
108 |
+
pydantic = "2.6.4"
|
109 |
+
pydantic_core = "2.16.3"
|
110 |
+
pydub = "0.25.1"
|
111 |
+
Pygments = "2.17.2"
|
112 |
+
pyparsing = "3.1.2"
|
113 |
+
python-dateutil = "2.9.0.post0"
|
114 |
+
python-json-logger = "2.0.7"
|
115 |
+
python-multipart = "0.0.9"
|
116 |
+
pytz = "2024.1"
|
117 |
+
PyYAML = "6.0.1"
|
118 |
+
pyzmq = "26.0.2"
|
119 |
+
rapidfuzz = "3.8.1"
|
120 |
+
referencing = "0.34.0"
|
121 |
+
regex = "2023.12.25"
|
122 |
+
requests = "2.31.0"
|
123 |
+
responses = "0.18.0"
|
124 |
+
rfc3339-validator = "0.1.4"
|
125 |
+
rfc3986-validator = "0.1.1"
|
126 |
+
rich = "13.7.1"
|
127 |
+
rouge-score = "0.1.2"
|
128 |
+
rpds-py = "0.18.0"
|
129 |
+
ruff = "0.3.5"
|
130 |
+
sacrebleu = "2.4.2"
|
131 |
+
safetensors = "0.4.2"
|
132 |
+
scikit-learn = "1.4.2"
|
133 |
+
scipy = "1.13.0"
|
134 |
+
semantic-version = "2.10.0"
|
135 |
+
Send2Trash = "1.8.3"
|
136 |
+
shellingham = "1.5.4"
|
137 |
+
six = "1.16.0"
|
138 |
+
sniffio = "1.3.1"
|
139 |
+
soupsieve = "2.5"
|
140 |
+
stack-data = "0.6.3"
|
141 |
+
starlette = "0.37.2"
|
142 |
+
sympy = "1.12"
|
143 |
+
tabulate = "0.9.0"
|
144 |
+
terminado = "0.18.1"
|
145 |
+
threadpoolctl = "3.4.0"
|
146 |
+
tinycss2 = "1.3.0"
|
147 |
+
tokenizers = "0.15.2"
|
148 |
+
tomli = "2.0.1"
|
149 |
+
tomlkit = "0.12.0"
|
150 |
+
toolz = "0.12.1"
|
151 |
+
torch = "2.2.2"
|
152 |
+
tornado = "6.4"
|
153 |
+
tqdm = "4.66.2"
|
154 |
+
traitlets = "5.14.3"
|
155 |
+
transformers = "4.39.3"
|
156 |
+
typer = "0.12.1"
|
157 |
+
types-python-dateutil = "2.9.0.20240316"
|
158 |
+
typing_extensions = "4.10.0"
|
159 |
+
tzdata = "2024.1"
|
160 |
+
uri-template = "1.3.0"
|
161 |
+
urllib3 = "2.2.1"
|
162 |
+
uvicorn = "0.29.0"
|
163 |
+
wcwidth = "0.2.13"
|
164 |
+
webcolors = "1.13"
|
165 |
+
webencodings = "0.5.1"
|
166 |
+
websocket-client = "1.8.0"
|
167 |
+
websockets = "11.0.3"
|
168 |
+
widgetsnbextension = "4.0.10"
|
169 |
+
xxhash = "3.4.1"
|
170 |
+
yarl = "1.9.4"
|
171 |
+
zipp = "3.18.1"
|
172 |
+
plotly = "5.22.0"
|
173 |
+
tenacity = "8.2.3"
|
174 |
+
Levenshtein = "0.25.1"
|
175 |
+
kaleido = "0.2.1"
|
176 |
+
jupyter = "^1.0.0"
|
177 |
+
grazie-api-gateway-client = {version = "^0.1.3", source = "space-grazie-ml"}
|
178 |
+
seaborn = "^0.13.2"
|
179 |
+
|
180 |
+
[[tool.poetry.source]]
|
181 |
+
name = "space-grazie-ml"
|
182 |
+
url = "https://packages.jetbrains.team/pypi/p/grazi/grazie-ml/simple"
|
183 |
+
priority="supplemental"
|
184 |
+
|
185 |
+
[build-system]
|
186 |
+
requires = ["poetry-core"]
|
187 |
+
build-backend = "poetry.core.masonry.api"
|