import gradio as gr

import analysis_util
import generate_annotated_diffs
import dataset_statistics

df_manual = generate_annotated_diffs.manual_data_with_annotated_diffs()
df_manual["end_to_start"] = False
df_manual["start_to_end"] = False
n_diffs_manual = len(df_manual)

df_synthetic = generate_annotated_diffs.synthetic_data_with_annotated_diffs()
n_diffs_synthetic = len(df_synthetic)


def golden():
    return df_synthetic[(df_synthetic['end_to_start'] == False) & (df_synthetic['start_to_end'] == False)]


def e2s():
    return df_synthetic[(df_synthetic['end_to_start'] == True) & (df_synthetic['start_to_end'] == False)]


def s2e():
    return df_synthetic[(df_synthetic['end_to_start'] == False) & (df_synthetic['start_to_end'] == True)]


def e2s_s2e():
    return df_synthetic[(df_synthetic['end_to_start'] == True) & (df_synthetic['start_to_end'] == True)]


def synthetic():
    return df_synthetic[(df_synthetic['end_to_start'] == True) | (df_synthetic['start_to_end'] == True)]


STATISTICS = {"manual": dataset_statistics.get_statistics_for_df(golden()),
              "e2s": dataset_statistics.get_statistics_for_df(e2s()),
              "s2e": dataset_statistics.get_statistics_for_df(s2e()),
              "e2s_s2e": dataset_statistics.get_statistics_for_df(e2s_s2e()),
              "synthetic": dataset_statistics.get_statistics_for_df(synthetic()),
              "all": dataset_statistics.get_statistics_for_df(df_synthetic)}

STATISTICS_T_TEST = dataset_statistics.t_test(STATISTICS, main_group='manual')

STAT_NAMES = list(STATISTICS['manual'].keys())


def update_dataset_view(diff_idx, df):
    diff_idx -= 1
    return (df.iloc[diff_idx]['annotated_diff'],
            df.iloc[diff_idx]['commit_msg_start'],
            df.iloc[diff_idx]['commit_msg_end'],
            df.iloc[diff_idx]['session'],
            str(df.iloc[diff_idx]['end_to_start']),
            str(df.iloc[diff_idx]['start_to_end']),
            f"https://github.com/{df.iloc[diff_idx]['repo']}/commit/{df.iloc[diff_idx]['hash']}",)


def update_dataset_view_manual(diff_idx):
    return update_dataset_view(diff_idx, df_manual)


def update_dataset_view_synthetic(diff_idx):
    return update_dataset_view(diff_idx, df_synthetic)


force_light_theme_js_func = """
function refresh() {
    const url = new URL(window.location);

    if (url.searchParams.get('__theme') !== 'light') {
        url.searchParams.set('__theme', 'light');
        window.location.href = url.href;
    }
}
"""

if __name__ == '__main__':
    with gr.Blocks(theme=gr.themes.Soft(), js=force_light_theme_js_func) as application:
        def dataset_view_tab(n_items):
            slider = gr.Slider(minimum=1, maximum=n_items, step=1, value=1,
                               label=f"Sample number (total: {n_items})")

            diff_view = gr.Highlightedtext(combine_adjacent=True, color_map={'+': "green", '-': "red"})
            start_view = gr.Textbox(interactive=False, label="Start message", container=True)
            end_view = gr.Textbox(interactive=False, label="End message", container=True)
            session_view = gr.Textbox(interactive=False, label="Session", container=True)
            is_end_to_start_view = gr.Textbox(interactive=False,
                                              label="Is generated on the 'end-to-start' synthesis step?",
                                              container=True)
            is_start_to_end_view = gr.Textbox(interactive=False,
                                              label="Is generated on the 'start-to-end' synthesis step?",
                                              container=True)
            link_view = gr.Markdown()

            view = [
                diff_view,
                start_view,
                end_view,
                session_view,
                is_end_to_start_view,
                is_start_to_end_view,
                link_view
            ]

            return slider, view


        with gr.Tab("Manual"):
            slider_manual, view_manual = dataset_view_tab(n_diffs_manual)

            slider_manual.change(update_dataset_view_manual, inputs=slider_manual,
                                 outputs=view_manual)

        with gr.Tab("Synthetic"):
            slider_synthetic, view_synthetic = dataset_view_tab(n_diffs_synthetic)

            slider_synthetic.change(update_dataset_view_synthetic, inputs=slider_synthetic,
                                    outputs=view_synthetic)
        with gr.Tab("Analysis"):
            def layout_for_statistics(statistics_group_name):
                gr.Markdown(f"### {statistics_group_name}")
                stats = STATISTICS[statistics_group_name]
                gr.Number(label="Count", interactive=False,
                          value=len(stats['deletions_norm']), min_width=00)
                gr.Number(label="Avg deletions number (rel to the initial msg length)", interactive=False,
                          value=stats['deletions_norm'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg insertions number (rel to the result length)", interactive=False,
                          value=stats['insertions_norm'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg changes number (rel to the initial msg length)", interactive=False,
                          value=stats['changes_norm'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg deletions number", interactive=False,
                          value=stats['deletions'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg insertions number", interactive=False,
                          value=stats['insertions'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg changes number", interactive=False,
                          value=stats['changes'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg edit distance", interactive=False,
                          value=stats['editdist'].mean().item(), precision=3, min_width=00)
                gr.Number(label="Avg length difference", interactive=False,
                          value=stats['lendiff'].mean().item(), precision=3, min_width=00)


            def layout_for_statistics_t_test(statistics_group_name):
                gr.Markdown(f"### {statistics_group_name}")
                stats = STATISTICS_T_TEST[statistics_group_name]
                gr.Number(label="Deletions number (rel to the initial msg length)", interactive=False,
                          value=stats['deletions_norm'], precision=3, min_width=00)
                gr.Number(label="Insertions number (rel to the result length)", interactive=False,
                          value=stats['insertions_norm'], precision=3, min_width=00)
                gr.Number(label="Changes number (rel to the initial msg length)", interactive=False,
                          value=stats['changes_norm'], precision=3, min_width=00)
                gr.Number(label="Deletions number", interactive=False,
                          value=stats['deletions'], precision=3, min_width=00)
                gr.Number(label="Insertions number", interactive=False,
                          value=stats['insertions'], precision=3, min_width=00)
                gr.Number(label="Changes number", interactive=False,
                          value=stats['changes'], precision=3, min_width=00)


            with gr.Row():
                with gr.Column(scale=1, min_width=100):
                    layout_for_statistics("manual")
                with gr.Column(scale=1, min_width=100):
                    layout_for_statistics("e2s")
                with gr.Column(scale=1, min_width=100):
                    layout_for_statistics("s2e")
                with gr.Column(scale=1, min_width=100):
                    layout_for_statistics("e2s_s2e")
                with gr.Column(scale=1, min_width=100):
                    layout_for_statistics("synthetic")
                with gr.Column(scale=1, min_width=100):
                    layout_for_statistics("all")

            # gr.Markdown(f"### Student t-test (p-value)")
            # with gr.Row():
            #     with gr.Column(scale=1, min_width=100):
            #         layout_for_statistics_t_test("manual")
            #     with gr.Column(scale=1, min_width=100):
            #         layout_for_statistics_t_test("e2s")
            #     with gr.Column(scale=1, min_width=100):
            #         layout_for_statistics_t_test("s2e")
            #     with gr.Column(scale=1, min_width=100):
            #         layout_for_statistics_t_test("e2s_s2e")
            #     with gr.Column(scale=1, min_width=100):
            #         layout_for_statistics_t_test("synthetic")
            #     with gr.Column(scale=1, min_width=100):
            #         layout_for_statistics_t_test("all")

            with gr.Row():
                with gr.Column(scale=1):
                    for stat_name in filter(lambda s: "_norm" not in s, STAT_NAMES):
                        chart = dataset_statistics.build_plotly_chart(
                            stat_golden=STATISTICS['manual'][stat_name],
                            stat_e2s=STATISTICS['e2s'][stat_name],
                            stat_s2e=STATISTICS['s2e'][stat_name],
                            stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name],
                            stat_name=stat_name
                        )

                        gr.Plot(value=chart)
                with gr.Column(scale=1):
                    with gr.Column(scale=1):
                        for stat_name in filter(lambda s: "_norm" in s, STAT_NAMES):
                            chart = dataset_statistics.build_plotly_chart(
                                stat_golden=STATISTICS['manual'][stat_name],
                                stat_e2s=STATISTICS['e2s'][stat_name],
                                stat_s2e=STATISTICS['s2e'][stat_name],
                                stat_e2s_s2e=STATISTICS['e2s_s2e'][stat_name],
                                stat_name=stat_name
                            )

                            gr.Plot(value=chart)

            gr.Markdown(f"### Reference-only correlations")
            gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="ind").to_markdown())

            gr.Markdown(f"### Aggregated correlations")
            gr.Markdown(value=analysis_util.get_correlations_for_groups(df_synthetic, right_side="aggr").to_markdown())

        application.load(update_dataset_view_manual, inputs=slider_manual,
                         outputs=view_manual)

        application.load(update_dataset_view_synthetic, inputs=slider_synthetic,
                         outputs=view_synthetic)

    application.launch()