File size: 4,672 Bytes
5434c4b
 
 
f26a894
5434c4b
0c136d8
b7f7a57
fbb73cc
0c136d8
5434c4b
5f3a4af
 
 
c151bb0
 
f26a894
5434c4b
b6ae739
5434c4b
02ebb6e
 
 
 
ea84073
13e3243
ea84073
b6ae739
 
 
5ae823f
b6ae739
 
 
5ae823f
5434c4b
 
642fae1
 
 
 
 
 
 
 
 
 
 
5434c4b
642fae1
0b259d2
 
 
 
 
 
 
 
ea84073
 
 
13e3243
 
 
0b259d2
 
 
 
 
 
 
d7e2287
13e3243
0b259d2
0c136d8
 
0b259d2
 
 
 
 
 
b6ae739
0c136d8
 
5f3a4af
0b259d2
5f3a4af
b6ae739
5f3a4af
f26a894
 
 
 
4017643
f26a894
 
 
 
 
 
 
 
 
 
 
 
 
5f3a4af
b6ae739
0c136d8
5434c4b
b6ae739
5f3a4af
 
5434c4b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr

import generate_annotated_diffs
import statistics

df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs()
df_manual["end_to_start"] = False
df_manual["start_to_end"] = False
n_diffs_manual = len(df_manual)

df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
n_diffs_synthetic = len(df_synthetic)

STATISTICS = {"manual": statistics.get_statistics_for_df(df_manual),
              "synthetic": statistics.get_statistics_for_df(df_synthetic)}


def update_dataset_view(diff_idx, df):
    diff_idx -= 1
    return (df.iloc[diff_idx]['annotated_diff'],
            df.iloc[diff_idx]['commit_msg_start'],
            df.iloc[diff_idx]['commit_msg_end'],
            df.iloc[diff_idx]['session'],
            str(df.iloc[diff_idx]['end_to_start']),
            str(df.iloc[diff_idx]['start_to_end']),
            f"https://github.com/{df.iloc[diff_idx]['repo']}/commit/{df.iloc[diff_idx]['hash']}",)


def update_dataset_view_manual(diff_idx):
    return update_dataset_view(diff_idx, df_manual)


def update_dataset_view_synthetic(diff_idx):
    return update_dataset_view(diff_idx, df_synthetic)


force_light_theme_js_func = """
function refresh() {
    const url = new URL(window.location);

    if (url.searchParams.get('__theme') !== 'light') {
        url.searchParams.set('__theme', 'light');
        window.location.href = url.href;
    }
}
"""

if __name__ == '__main__':
    with gr.Blocks(theme=gr.themes.Soft(), js=force_light_theme_js_func) as application:
        def dataset_view_tab(n_items):
            slider = gr.Slider(minimum=1, maximum=n_items, step=1, value=1,
                               label=f"Sample number (total: {n_items})")

            diff_view = gr.Highlightedtext(combine_adjacent=True, color_map={'+': "green", '-': "red"})
            start_view = gr.Textbox(interactive=False, label="Start message", container=True)
            end_view = gr.Textbox(interactive=False, label="End message", container=True)
            session_view = gr.Textbox(interactive=False, label="Session", container=True)
            is_end_to_start_view = gr.Textbox(interactive=False,
                                              label="Is generated on the 'end-to-start' synthesis step?",
                                              container=True)
            is_start_to_end_view = gr.Textbox(interactive=False,
                                              label="Is generated on the 'start-to-end' synthesis step?",
                                              container=True)
            link_view = gr.Markdown()

            view = [
                diff_view,
                start_view,
                end_view,
                session_view,
                is_end_to_start_view,
                is_start_to_end_view,
                link_view
            ]

            return slider, view


        with gr.Tab("Manual"):
            slider_manual, view_manual = dataset_view_tab(n_diffs_manual)

            slider_manual.change(update_dataset_view_manual, inputs=slider_manual,
                                 outputs=view_manual)

        with gr.Tab("Synthetic"):
            slider_synthetic, view_synthetic = dataset_view_tab(n_diffs_synthetic)

            slider_synthetic.change(update_dataset_view_synthetic, inputs=slider_synthetic,
                                    outputs=view_synthetic)
        with gr.Tab("Compare"):
            def layout_for_statistics(statistics_group_name):
                gr.Markdown(f"### {statistics_group_name}")
                stats = STATISTICS[statistics_group_name]
                gr.Number(label="Average deletions number (rel to the initial message length)", interactive=False,
                          value=stats['deletions'].mean().item(), precision=3)
                gr.Number(label="Average insertions number (rel to the result length)", interactive=False,
                          value=stats['insertions'].mean().item(), precision=3)
                gr.Number(label="Average changes number (rel to the result length)", interactive=False,
                          value=stats['changes'].mean().item(), precision=3)


            with gr.Row():
                with gr.Column(scale=1):
                    layout_for_statistics("manual")

                with gr.Column(scale=1):
                    layout_for_statistics("synthetic")

        application.load(update_dataset_view_manual, inputs=slider_manual,
                         outputs=view_manual)

        application.load(update_dataset_view_synthetic, inputs=slider_synthetic,
                         outputs=view_synthetic)

    application.launch()