Petr Tsvetkov
commited on
Commit
β’
a7bba68
1
Parent(s):
7ab7be2
Latest version of the code; config updated to JetBrains-Research
Browse files- analysis.ipynb +0 -0
- analysis_util.py +3 -4
- chart_processing.ipynb +0 -0
- config.py +2 -2
- generation_steps/metrics_analysis.py +2 -1
analysis.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
analysis_util.py
CHANGED
@@ -59,15 +59,14 @@ def get_correlations_for_groups(df, right_side):
|
|
59 |
|
60 |
for e2s in (False, True):
|
61 |
for s2e in (False, True):
|
62 |
-
group = ""
|
63 |
if e2s:
|
64 |
group += "+e2s"
|
65 |
if s2e:
|
66 |
group += "+s2e"
|
67 |
-
if group == "":
|
68 |
-
group = "golden"
|
69 |
|
70 |
-
subdf = df[(df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)
|
|
|
71 |
subdf_corr = get_correlations_df(subdf, right_side=right_side)
|
72 |
correlations[group] = subdf_corr
|
73 |
|
|
|
59 |
|
60 |
for e2s in (False, True):
|
61 |
for s2e in (False, True):
|
62 |
+
group = "golden"
|
63 |
if e2s:
|
64 |
group += "+e2s"
|
65 |
if s2e:
|
66 |
group += "+s2e"
|
|
|
|
|
67 |
|
68 |
+
subdf = df[((df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)) | (
|
69 |
+
(df["end_to_start"] == False) & (df["start_to_end"] == False))]
|
70 |
subdf_corr = get_correlations_df(subdf, right_side=right_side)
|
71 |
correlations[group] = subdf_corr
|
72 |
|
chart_processing.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
config.py
CHANGED
@@ -8,7 +8,7 @@ GRAZIE_TIMEOUT_SEC = 1.0
|
|
8 |
|
9 |
HF_TOKEN = os.environ.get('HF_TOKEN')
|
10 |
|
11 |
-
HF_RAW_DATASET_NAME = "
|
12 |
HF_RAW_DATASET_SPLIT = 'train'
|
13 |
|
14 |
HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
|
@@ -19,7 +19,7 @@ HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results"
|
|
19 |
HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613"
|
20 |
HF_PREDICTIONS_DATASET_SPLIT = "test"
|
21 |
|
22 |
-
HF_SYNTHETIC_DATASET_NAME = "
|
23 |
HF_SYNTHETIC_DATASET_SPLIT = 'train'
|
24 |
|
25 |
LLM_MODEL = "gpt-4-1106-preview"
|
|
|
8 |
|
9 |
HF_TOKEN = os.environ.get('HF_TOKEN')
|
10 |
|
11 |
+
HF_RAW_DATASET_NAME = "JetBrains-Research/commit-msg-rewriting"
|
12 |
HF_RAW_DATASET_SPLIT = 'train'
|
13 |
|
14 |
HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation"
|
|
|
19 |
HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613"
|
20 |
HF_PREDICTIONS_DATASET_SPLIT = "test"
|
21 |
|
22 |
+
HF_SYNTHETIC_DATASET_NAME = "JetBrains-Research/synthetic-commit-msg-rewriting"
|
23 |
HF_SYNTHETIC_DATASET_SPLIT = 'train'
|
24 |
|
25 |
LLM_MODEL = "gpt-4-1106-preview"
|
generation_steps/metrics_analysis.py
CHANGED
@@ -147,7 +147,8 @@ IND_METRICS = {
|
|
147 |
"ter": ter_fn,
|
148 |
}
|
149 |
|
150 |
-
AGGR_METRICS =
|
|
|
151 |
# del AGGR_METRICS["gptscore-ref-1-req"]
|
152 |
# del AGGR_METRICS["gptscore-noref-1-req"]
|
153 |
|
|
|
147 |
"ter": ter_fn,
|
148 |
}
|
149 |
|
150 |
+
AGGR_METRICS = {}
|
151 |
+
# AGGR_METRICS = IND_METRICS.copy()
|
152 |
# del AGGR_METRICS["gptscore-ref-1-req"]
|
153 |
# del AGGR_METRICS["gptscore-noref-1-req"]
|
154 |
|