Spaces:
Runtime error
Runtime error
import pickle | |
import Levenshtein | |
import numpy as np | |
import pandas as pd | |
import plotly.figure_factory as ff | |
import config | |
def get_statistics_for_sample(start_msg, end_msg, row=None): | |
edit_ops = Levenshtein.editops(start_msg, end_msg) | |
n_deletes = sum([1 if op == "delete" else 0 for op, _, _ in edit_ops]) | |
n_inserts = sum([1 if op == "insert" else 0 for op, _, _ in edit_ops]) | |
n_replaces = sum([1 if op == "replace" else 0 for op, _, _ in edit_ops]) | |
n_changes = n_deletes + n_inserts + n_replaces | |
n_deletes += n_replaces | |
n_inserts += n_replaces | |
return { | |
"deletions": n_deletes, | |
"insertions": n_inserts, | |
"changes": n_changes, | |
"deletions_norm": n_deletes / len(start_msg), | |
"insertions_norm": n_inserts / len(end_msg), | |
"changes_norm": n_changes / len(end_msg), | |
"lendiff": abs(len(start_msg) - len(end_msg)), | |
"editdist": row["editdist"] if row is not None else Levenshtein.distance(start_msg, end_msg), | |
} | |
def get_statistics_for_row(row): | |
if "commit_msg_start" in row: | |
start = row["commit_msg_start"] | |
else: | |
start = row["G_text"] | |
if "commit_msg_end" in row: | |
end = row["commit_msg_end"] | |
else: | |
end = row["E_text"] | |
return get_statistics_for_sample(start, end, row=row) | |
def get_statistics_for_df(df: pd.DataFrame): | |
stats = [get_statistics_for_row(row) for _, row in df.iterrows()] | |
assert len(stats) > 0 | |
return {stat_name: np.asarray([e[stat_name] for e in stats]) for stat_name in stats[0]} | |
def build_plotly_chart(stat_golden, stat_e2s, stat_s2e, stat_e2s_s2e, stat_name): | |
hist_data = [ | |
stat_golden, | |
stat_e2s, | |
stat_s2e, | |
stat_e2s_s2e, | |
np.concatenate((stat_e2s, stat_s2e, stat_e2s_s2e), axis=0), | |
] | |
group_labels = ["Golden", "e2s", "s2e", "e2s+s2e", "Synthetic"] | |
fig = ff.create_distplot(hist_data, group_labels, bin_size=0.05, show_rug=False, show_hist=False) | |
fig.update_layout(title_text=stat_name) | |
with open(config.OUTPUT_CHARTS_DIR / f"{stat_name}_data.pkl", "wb") as f: | |
pickle.dump(hist_data, f) | |
return fig | |