some fixes to gradio app
Browse files- analysis_util.py +0 -74
- api_wrappers/hf_data_loader.py +17 -16
- change_visualizer.py +12 -123
- dataset_statistics.py +10 -18
- generate_annotated_diffs.py +8 -2
analysis_util.py
DELETED
@@ -1,74 +0,0 @@
|
|
1 |
-
import functools
|
2 |
-
import operator
|
3 |
-
|
4 |
-
import pandas as pd
|
5 |
-
|
6 |
-
|
7 |
-
def correlations_for_group(group):
|
8 |
-
REL_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_related")]
|
9 |
-
IND_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_independent")]
|
10 |
-
AGGR_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_aggr")]
|
11 |
-
|
12 |
-
correlations = []
|
13 |
-
for rel_metric in REL_METRICS:
|
14 |
-
for ind_metric in IND_METRICS:
|
15 |
-
correlations.append({
|
16 |
-
f"rel_{rel_metric}_ind_{ind_metric}_pearson": group[f"{rel_metric}_related"].corr(
|
17 |
-
group[f"{ind_metric}_independent"], method="pearson"),
|
18 |
-
f"rel_{rel_metric}_ind_{ind_metric}_spearman": group[f"{rel_metric}_related"].corr(
|
19 |
-
group[f"{ind_metric}_independent"], method="spearman"),
|
20 |
-
})
|
21 |
-
for aggr_metric in AGGR_METRICS:
|
22 |
-
correlations.append({
|
23 |
-
f"rel_{rel_metric}_aggr_{aggr_metric}_pearson": group[f"{rel_metric}_related"].corr(
|
24 |
-
group[f"{aggr_metric}_aggr"], method="pearson"),
|
25 |
-
f"rel_{rel_metric}_aggr_{aggr_metric}_spearman": group[f"{rel_metric}_related"].corr(
|
26 |
-
group[f"{aggr_metric}_aggr"], method="spearman"),
|
27 |
-
})
|
28 |
-
return pd.Series(functools.reduce(operator.ior, correlations, {}))
|
29 |
-
|
30 |
-
|
31 |
-
def split_metrics_string(s):
|
32 |
-
tokens = s.split("_")
|
33 |
-
return tokens[1], tokens[3]
|
34 |
-
|
35 |
-
|
36 |
-
def get_correlations_df(df, right_side):
|
37 |
-
correlations_raw = correlations_for_group(df)
|
38 |
-
|
39 |
-
idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index if right_side in col))
|
40 |
-
|
41 |
-
data = []
|
42 |
-
for metrics in idx:
|
43 |
-
data.append(
|
44 |
-
{"metrics": metrics,
|
45 |
-
"spearman": correlations_raw[f"{metrics}_spearman"],
|
46 |
-
"pearson": correlations_raw[f"{metrics}_pearson"],
|
47 |
-
}
|
48 |
-
)
|
49 |
-
|
50 |
-
result = pd.DataFrame.from_records(data=data, index="metrics").sort_index()
|
51 |
-
result.index = pd.MultiIndex.from_tuples(result.index.map(split_metrics_string).tolist())
|
52 |
-
result.index.set_names(["relative", "independent"], inplace=True)
|
53 |
-
|
54 |
-
return result
|
55 |
-
|
56 |
-
|
57 |
-
def get_correlations_for_groups(df, right_side):
|
58 |
-
correlations = {"all": get_correlations_df(df, right_side=right_side)}
|
59 |
-
|
60 |
-
for e2s in (False, True):
|
61 |
-
for s2e in (False, True):
|
62 |
-
group = "golden"
|
63 |
-
if e2s:
|
64 |
-
group += "+e2s"
|
65 |
-
if s2e:
|
66 |
-
group += "+s2e"
|
67 |
-
|
68 |
-
subdf = df[((df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)) | (
|
69 |
-
(df["end_to_start"] == False) & (df["start_to_end"] == False))]
|
70 |
-
subdf_corr = get_correlations_df(subdf, right_side=right_side)
|
71 |
-
correlations[group] = subdf_corr
|
72 |
-
|
73 |
-
correlations = pd.concat(correlations, axis=1)
|
74 |
-
return correlations
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api_wrappers/hf_data_loader.py
CHANGED
@@ -2,6 +2,7 @@ import json
|
|
2 |
import os
|
3 |
from datetime import datetime, timedelta
|
4 |
|
|
|
5 |
from datasets import load_dataset
|
6 |
from huggingface_hub import hf_hub_download, list_repo_tree
|
7 |
|
@@ -66,7 +67,7 @@ def load_processed_rewriting_as_pandas():
|
|
66 |
|
67 |
def load_synthetic_as_pandas():
|
68 |
return load_dataset(config.HF_SYNTHETIC_DATASET_NAME,
|
69 |
-
"
|
70 |
split=config.HF_SYNTHETIC_DATASET_SPLIT,
|
71 |
token=config.HF_TOKEN,
|
72 |
cache_dir=config.CACHE_DIR).to_pandas()
|
@@ -75,21 +76,21 @@ def load_synthetic_as_pandas():
|
|
75 |
def load_full_commit_with_predictions_as_pandas():
|
76 |
full_dataset = load_full_commit_as_pandas()
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
predictions_dataset = predictions_dataset[~predictions_dataset.index.duplicated(keep='first')]
|
94 |
|
95 |
dataset = full_dataset.join(other=predictions_dataset, on=('hash', 'repo'))
|
|
|
2 |
import os
|
3 |
from datetime import datetime, timedelta
|
4 |
|
5 |
+
import pandas as pd
|
6 |
from datasets import load_dataset
|
7 |
from huggingface_hub import hf_hub_download, list_repo_tree
|
8 |
|
|
|
67 |
|
68 |
def load_synthetic_as_pandas():
|
69 |
return load_dataset(config.HF_SYNTHETIC_DATASET_NAME,
|
70 |
+
"all_pairs_with_metrics",
|
71 |
split=config.HF_SYNTHETIC_DATASET_SPLIT,
|
72 |
token=config.HF_TOKEN,
|
73 |
cache_dir=config.CACHE_DIR).to_pandas()
|
|
|
76 |
def load_full_commit_with_predictions_as_pandas():
|
77 |
full_dataset = load_full_commit_as_pandas()
|
78 |
|
79 |
+
predictions_paths = []
|
80 |
+
for prediction_file in list_repo_tree(repo_id=config.HF_PREDICTIONS_DATASET_NAME,
|
81 |
+
path=os.path.join("commit_message_generation/predictions", config.HF_PREDICTIONS_MODEL),
|
82 |
+
repo_type="dataset"):
|
83 |
+
predictions_paths.append(hf_hub_download(prediction_file.path,
|
84 |
+
repo_id=config.HF_PREDICTIONS_DATASET_NAME,
|
85 |
+
repo_type="dataset",
|
86 |
+
cache_dir=config.CACHE_DIR))
|
87 |
+
|
88 |
+
dfs = []
|
89 |
+
for path in predictions_paths:
|
90 |
+
dfs.append(pd.read_json(path, orient="records", lines=True))
|
91 |
+
predictions_dataset = pd.concat(dfs, axis=0, ignore_index=True)
|
92 |
+
predictions_dataset = predictions_dataset.sample(frac=1,
|
93 |
+
random_state=config.RANDOM_STATE).set_index(['hash', 'repo'])[["prediction"]]
|
94 |
predictions_dataset = predictions_dataset[~predictions_dataset.index.duplicated(keep='first')]
|
95 |
|
96 |
dataset = full_dataset.join(other=predictions_dataset, on=('hash', 'repo'))
|
change_visualizer.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
|
3 |
-
import analysis_util
|
4 |
import generate_annotated_diffs
|
5 |
-
import dataset_statistics
|
6 |
|
7 |
df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs()
|
8 |
df_manual["end_to_start"] = False
|
@@ -14,45 +12,33 @@ n_diffs_synthetic = len(df_synthetic)
|
|
14 |
|
15 |
|
16 |
def golden():
|
17 |
-
return df_synthetic[(df_synthetic['
|
18 |
|
19 |
|
20 |
-
def
|
21 |
-
return df_synthetic[(df_synthetic['
|
22 |
|
23 |
|
24 |
-
def
|
25 |
-
return df_synthetic[(df_synthetic['
|
26 |
|
27 |
|
28 |
-
def
|
29 |
-
return df_synthetic[(df_synthetic['
|
30 |
|
31 |
|
32 |
def synthetic():
|
33 |
return df_synthetic[(df_synthetic['end_to_start'] == True) | (df_synthetic['start_to_end'] == True)]
|
34 |
|
35 |
|
36 |
-
STATISTICS = {"manual": dataset_statistics.get_statistics_for_df(golden()),
|
37 |
-
"e2s": dataset_statistics.get_statistics_for_df(e2s()),
|
38 |
-
"s2e": dataset_statistics.get_statistics_for_df(s2e()),
|
39 |
-
"e2s_s2e": dataset_statistics.get_statistics_for_df(e2s_s2e()),
|
40 |
-
"synthetic": dataset_statistics.get_statistics_for_df(synthetic()),
|
41 |
-
"all": dataset_statistics.get_statistics_for_df(df_synthetic)}
|
42 |
-
|
43 |
-
STATISTICS_T_TEST = dataset_statistics.t_test(STATISTICS, main_group='manual')
|
44 |
-
|
45 |
-
STAT_NAMES = list(STATISTICS['manual'].keys())
|
46 |
-
|
47 |
-
|
48 |
def update_dataset_view(diff_idx, df):
|
49 |
diff_idx -= 1
|
50 |
return (df.iloc[diff_idx]['annotated_diff'],
|
51 |
-
df.iloc[diff_idx]['commit_msg_start'],
|
52 |
-
df.iloc[diff_idx]['commit_msg_end'],
|
53 |
-
df.iloc[diff_idx]['session'],
|
54 |
-
str(df.iloc[diff_idx]['end_to_start']),
|
55 |
-
str(df.iloc[diff_idx]['start_to_end']),
|
56 |
f"https://github.com/{df.iloc[diff_idx]['repo']}/commit/{df.iloc[diff_idx]['hash']}",)
|
57 |
|
58 |
|
@@ -119,103 +105,6 @@ if __name__ == '__main__':
|
|
119 |
slider_synthetic.change(update_dataset_view_synthetic,
|
120 |
inputs=slider_synthetic,
|
121 |
outputs=view_synthetic)
|
122 |
-
with gr.Tab("Analysis"):
|
123 |
-
def layout_for_statistics(statistics_group_name):
|
124 |
-
gr.Markdown(f"### {statistics_group_name}")
|
125 |
-
stats = STATISTICS[statistics_group_name]
|
126 |
-
gr.Number(label="Count", interactive=False,
|
127 |
-
value=len(stats['deletions_norm']), min_width=00)
|
128 |
-
gr.Number(label="Avg deletions number (rel to the initial msg length)", interactive=False,
|
129 |
-
value=stats['deletions_norm'].mean().item(), precision=3, min_width=00)
|
130 |
-
gr.Number(label="Avg insertions number (rel to the result length)", interactive=False,
|
131 |
-
value=stats['insertions_norm'].mean().item(), precision=3, min_width=00)
|
132 |
-
gr.Number(label="Avg changes number (rel to the initial msg length)", interactive=False,
|
133 |
-
value=stats['changes_norm'].mean().item(), precision=3, min_width=00)
|
134 |
-
gr.Number(label="Avg deletions number", interactive=False,
|
135 |
-
value=stats['deletions'].mean().item(), precision=3, min_width=00)
|
136 |
-
gr.Number(label="Avg insertions number", interactive=False,
|
137 |
-
value=stats['insertions'].mean().item(), precision=3, min_width=00)
|
138 |
-
gr.Number(label="Avg changes number", interactive=False,
|
139 |
-
value=stats['changes'].mean().item(), precision=3, min_width=00)
|
140 |
-
gr.Number(label="Avg edit distance", interactive=False,
|
141 |
-
value=stats['editdist'].mean().item(), precision=3, min_width=00)
|
142 |
-
gr.Number(label="Avg length difference", interactive=False,
|
143 |
-
value=stats['lendiff'].mean().item(), precision=3, min_width=00)
|
144 |
-
|
145 |
-
|
146 |
-
def layout_for_statistics_t_test(statistics_group_name):
|
147 |
-
gr.Markdown(f"### {statistics_group_name}")
|
148 |
-
stats = STATISTICS_T_TEST[statistics_group_name]
|
149 |
-
gr.Number(label="Deletions number (rel to the initial msg length)", interactive=False,
|
150 |
-
value=stats['deletions_norm'], precision=3, min_width=00)
|
151 |
-
gr.Number(label="Insertions number (rel to the result length)", interactive=False,
|
152 |
-
value=stats['insertions_norm'], precision=3, min_width=00)
|
153 |
-
gr.Number(label="Changes number (rel to the initial msg length)", interactive=False,
|
154 |
-
value=stats['changes_norm'], precision=3, min_width=00)
|
155 |
-
gr.Number(label="Deletions number", interactive=False,
|
156 |
-
value=stats['deletions'], precision=3, min_width=00)
|
157 |
-
gr.Number(label="Insertions number", interactive=False,
|
158 |
-
value=stats['insertions'], precision=3, min_width=00)
|
159 |
-
gr.Number(label="Changes number", interactive=False,
|
160 |
-
value=stats['changes'], precision=3, min_width=00)
|
161 |
-
|
162 |
-
|
163 |
-
with gr.Row():
|
164 |
-
with gr.Column(scale=1, min_width=100):
|
165 |
-
layout_for_statistics("manual")
|
166 |
-
with gr.Column(scale=1, min_width=100):
|
167 |
-
layout_for_statistics("e2s")
|
168 |
-
with gr.Column(scale=1, min_width=100):
|
169 |
-
layout_for_statistics("s2e")
|
170 |
-
with gr.Column(scale=1, min_width=100):
|
171 |
-
layout_for_statistics("e2s_s2e")
|
172 |
-
with gr.Column(scale=1, min_width=100):
|
173 |
-
layout_for_statistics("synthetic")
|
174 |
-
with gr.Column(scale=1, min_width=100):
|
175 |
-
layout_for_statistics("all")
|
176 |
-
|
177 |
-
# gr.Markdown(f"### Student t-test (p-value)")
|
178 |
-
# with gr.Row():
|
179 |
-
# with gr.Column(scale=1, min_width=100):
|
180 |
-
# layout_for_statistics_t_test("manual")
|
181 |
-
# with gr.Column(scale=1, min_width=100):
|
182 |
-
# layout_for_statistics_t_test("e2s")
|
183 |
-
# with gr.Column(scale=1, min_width=100):
|
184 |
-
# layout_for_statistics_t_test("s2e")
|
185 |
-
# with gr.Column(scale=1, min_width=100):
|
186 |
-
# layout_for_statistics_t_test("e2s_s2e")
|
187 |
-
# with gr.Column(scale=1, min_width=100):
|
188 |
-
# layout_for_statistics_t_test("synthetic")
|
189 |
-
# with gr.Column(scale=1, min_width=100):
|
190 |
-
# layout_for_statistics_t_test("all")
|
191 |
-
|
192 |
-
with gr.Row():
|
193 |
-
with gr.Column(scale=1):
|
194 |
-
for stat_name in filter(lambda s: "_norm" not in s, STAT_NAMES):
|
195 |
-
chart = dataset_statistics.build_plotly_chart(
|
196 |
-
stat_golden=STATISTICS['manual'][stat_name],
|
197 |
-
stat_e2s=STATISTICS['e2s'][stat_name],
|
198 |
-
stat_s2e=STATISTICS['s2e'][stat_name],
|
199 |
-
stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name],
|
200 |
-
stat_name=stat_name
|
201 |
-
)
|
202 |
-
|
203 |
-
gr.Plot(value=chart)
|
204 |
-
with gr.Column(scale=1):
|
205 |
-
with gr.Column(scale=1):
|
206 |
-
for stat_name in filter(lambda s: "_norm" in s, STAT_NAMES):
|
207 |
-
chart = dataset_statistics.build_plotly_chart(
|
208 |
-
stat_golden=STATISTICS['manual'][stat_name],
|
209 |
-
stat_e2s=STATISTICS['e2s'][stat_name],
|
210 |
-
stat_s2e=STATISTICS['s2e'][stat_name],
|
211 |
-
stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name],
|
212 |
-
stat_name=stat_name
|
213 |
-
)
|
214 |
-
|
215 |
-
gr.Plot(value=chart)
|
216 |
-
|
217 |
-
gr.Markdown(f"### Metrics correlations")
|
218 |
-
gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown())
|
219 |
|
220 |
application.load(update_dataset_view_manual, inputs=slider_manual,
|
221 |
outputs=view_manual)
|
|
|
1 |
import gradio as gr
|
2 |
|
|
|
3 |
import generate_annotated_diffs
|
|
|
4 |
|
5 |
df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs()
|
6 |
df_manual["end_to_start"] = False
|
|
|
12 |
|
13 |
|
14 |
def golden():
|
15 |
+
return df_synthetic.loc[df_synthetic.is_related].loc[(df_synthetic['G_type'] == "initial") & (df_synthetic['E_type'] == "expert_labeled")]
|
16 |
|
17 |
|
18 |
+
def backward():
|
19 |
+
return df_synthetic.loc[df_synthetic.is_related].loc[(df_synthetic['G_type'] == "backward") & (df_synthetic['E_type'].isin(["synthetic_forward", "synthtetic_forward_from_backward"]))]
|
20 |
|
21 |
|
22 |
+
def forward():
|
23 |
+
return df_synthetic.loc[df_synthetic.is_related].loc[(df_synthetic['G_type'] == "initial") & (df_synthetic['E_type'] == "synthetic_forward")]
|
24 |
|
25 |
|
26 |
+
def forward_from_backward():
|
27 |
+
return df_synthetic.loc[df_synthetic.is_related].loc[(df_synthetic['G_type'] == "synthetic_backward") & (df_synthetic['E_type'] == "synthetic_forward_from_backward")]
|
28 |
|
29 |
|
30 |
def synthetic():
|
31 |
return df_synthetic[(df_synthetic['end_to_start'] == True) | (df_synthetic['start_to_end'] == True)]
|
32 |
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def update_dataset_view(diff_idx, df):
|
35 |
diff_idx -= 1
|
36 |
return (df.iloc[diff_idx]['annotated_diff'],
|
37 |
+
df.iloc[diff_idx]['commit_msg_start'] if "commit_msg_start" in df.columns else df.iloc[diff_idx]['G_text'],
|
38 |
+
df.iloc[diff_idx]['commit_msg_end'] if "commit_msg_end" in df.columns else df.iloc[diff_idx]['E_text'],
|
39 |
+
df.iloc[diff_idx]['session'] if "session" in df.columns else "",
|
40 |
+
str(df.iloc[diff_idx]['end_to_start']) if "end_to_start" in df.columns else "",
|
41 |
+
str(df.iloc[diff_idx]['start_to_end']) if "start_to_end" in df.columns else "",
|
42 |
f"https://github.com/{df.iloc[diff_idx]['repo']}/commit/{df.iloc[diff_idx]['hash']}",)
|
43 |
|
44 |
|
|
|
105 |
slider_synthetic.change(update_dataset_view_synthetic,
|
106 |
inputs=slider_synthetic,
|
107 |
outputs=view_synthetic)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
application.load(update_dataset_view_manual, inputs=slider_manual,
|
110 |
outputs=view_manual)
|
dataset_statistics.py
CHANGED
@@ -29,14 +29,20 @@ def get_statistics_for_sample(start_msg, end_msg, row=None):
|
|
29 |
"changes_norm": n_changes / len(end_msg),
|
30 |
|
31 |
"lendiff": abs(len(start_msg) - len(end_msg)),
|
32 |
-
"editdist": row["
|
33 |
}
|
34 |
|
35 |
|
36 |
def get_statistics_for_row(row):
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
|
42 |
def get_statistics_for_df(df: pd.DataFrame):
|
@@ -63,17 +69,3 @@ def build_plotly_chart(stat_golden, stat_e2s, stat_s2e, stat_e2s_s2e, stat_name)
|
|
63 |
pickle.dump(hist_data, f)
|
64 |
|
65 |
return fig
|
66 |
-
|
67 |
-
|
68 |
-
def t_test(group_stats, main_group="manual"):
|
69 |
-
results = {}
|
70 |
-
for group in group_stats:
|
71 |
-
results[group] = {}
|
72 |
-
for stat in group_stats[group]:
|
73 |
-
a = group_stats[main_group][stat]
|
74 |
-
b = group_stats[group][stat]
|
75 |
-
|
76 |
-
p = stats.ttest_ind(a, b, equal_var=False, random_state=config.RANDOM_STATE).pvalue
|
77 |
-
results[group][stat] = p
|
78 |
-
|
79 |
-
return results
|
|
|
29 |
"changes_norm": n_changes / len(end_msg),
|
30 |
|
31 |
"lendiff": abs(len(start_msg) - len(end_msg)),
|
32 |
+
"editdist": row["editdist"] if row is not None else Levenshtein.distance(start_msg, end_msg),
|
33 |
}
|
34 |
|
35 |
|
36 |
def get_statistics_for_row(row):
|
37 |
+
if "commit_msg_start" in row:
|
38 |
+
start = row['commit_msg_start']
|
39 |
+
else:
|
40 |
+
start = row["G_text"]
|
41 |
+
if "commit_msg_end" in row:
|
42 |
+
end = row['commit_msg_end']
|
43 |
+
else:
|
44 |
+
end = row["E_text"]
|
45 |
+
return get_statistics_for_sample(start, end, row=row)
|
46 |
|
47 |
|
48 |
def get_statistics_for_df(df: pd.DataFrame):
|
|
|
69 |
pickle.dump(hist_data, f)
|
70 |
|
71 |
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_annotated_diffs.py
CHANGED
@@ -21,8 +21,14 @@ def get_annotated_diff(start_text, end_text):
|
|
21 |
|
22 |
|
23 |
def annotated_diff_for_row(row):
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
return get_annotated_diff(start, end)
|
27 |
|
28 |
|
|
|
21 |
|
22 |
|
23 |
def annotated_diff_for_row(row):
|
24 |
+
if "commit_msg_start" in row:
|
25 |
+
start = row['commit_msg_start']
|
26 |
+
else:
|
27 |
+
start = row["G_text"]
|
28 |
+
if "commit_msg_end" in row:
|
29 |
+
end = row['commit_msg_end']
|
30 |
+
else:
|
31 |
+
end = row["E_text"]
|
32 |
return get_annotated_diff(start, end)
|
33 |
|
34 |
|