commit-message-editing-visualization / change_visualizer.py
Petr Tsvetkov
Visualizer bugs fixed; added normalized editdist
aef1dbe
raw
history blame
5.06 kB
import gradio as gr
import analysis_util
import generate_annotated_diffs
import statistics
df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs()
df_manual["end_to_start"] = False
df_manual["start_to_end"] = False
n_diffs_manual = len(df_manual)
df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
n_diffs_synthetic = len(df_synthetic)
STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
"synthetic": statistics.get_statistics_for_df(df_synthetic)}
def update_dataset_view(diff_idx, df):
diff_idx -= 1
return (df.iloc[diff_idx]['annotated_diff'],
df.iloc[diff_idx]['commit_msg_start'],
df.iloc[diff_idx]['commit_msg_end'],
df.iloc[diff_idx]['session'],
str(df.iloc[diff_idx]['end_to_start']),
str(df.iloc[diff_idx]['start_to_end']),
f"https://github.com/{df.iloc[diff_idx]['repo']}/commit/{df.iloc[diff_idx]['hash']}",)
def update_dataset_view_manual(diff_idx):
return update_dataset_view(diff_idx, df_manual)
def update_dataset_view_synthetic(diff_idx):
return update_dataset_view(diff_idx, df_synthetic)
force_light_theme_js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'light') {
url.searchParams.set('__theme', 'light');
window.location.href = url.href;
}
}
"""
if __name__ == '__main__':
with gr.Blocks(theme=gr.themes.Soft(), js=force_light_theme_js_func) as application:
def dataset_view_tab(n_items):
slider = gr.Slider(minimum=1, maximum=n_items, step=1, value=1,
label=f"Sample number (total: {n_items})")
diff_view = gr.Highlightedtext(combine_adjacent=True, color_map={'+': "green", '-': "red"})
start_view = gr.Textbox(interactive=False, label="Start message", container=True)
end_view = gr.Textbox(interactive=False, label="End message", container=True)
session_view = gr.Textbox(interactive=False, label="Session", container=True)
is_end_to_start_view = gr.Textbox(interactive=False,
label="Is generated on the 'end-to-start' synthesis step?",
container=True)
is_start_to_end_view = gr.Textbox(interactive=False,
label="Is generated on the 'start-to-end' synthesis step?",
container=True)
link_view = gr.Markdown()
view = [
diff_view,
start_view,
end_view,
session_view,
is_end_to_start_view,
is_start_to_end_view,
link_view
]
return slider, view
with gr.Tab("Manual"):
slider_manual, view_manual = dataset_view_tab(n_diffs_manual)
slider_manual.change(update_dataset_view_manual, inputs=slider_manual,
outputs=view_manual)
with gr.Tab("Synthetic"):
slider_synthetic, view_synthetic = dataset_view_tab(n_diffs_synthetic)
slider_synthetic.change(update_dataset_view_synthetic, inputs=slider_synthetic,
outputs=view_synthetic)
with gr.Tab("Analysis"):
def layout_for_statistics(statistics_group_name):
gr.Markdown(f"### {statistics_group_name}")
stats = STATISTICS[statistics_group_name]
gr.Number(label="Average deletions number (rel to the initial message length)", interactive=False,
value=stats['deletions'].mean().item(), precision=3)
gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
value=stats['insertions'].mean().item(), precision=3)
gr.Number(label="Average changes number (rel to the result length)", interactive=False,
value=stats['changes'].mean().item(), precision=3)
with gr.Row():
with gr.Column(scale=1):
layout_for_statistics("manual")
with gr.Column(scale=1):
layout_for_statistics("synthetic")
gr.Markdown(f"### Reference-only correlations")
gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown())
# gr.Markdown(f"### Aggregated correlations")
# gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown())
application.load(update_dataset_view_manual, inputs=slider_manual,
outputs=view_manual)
application.load(update_dataset_view_synthetic, inputs=slider_synthetic,
outputs=view_synthetic)
application.launch()