Spaces:

JetBrains-Research
/

commit-message-editing-visualization

Sleeping

File size: 2,858 Bytes

39950c9
 
 
a01d3ba
 
39950c9
 
9e1ff19
 
 
39950c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a01d3ba
 
 
 
 
 
 
39950c9
a01d3ba
 
39950c9
a01d3ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39950c9
aef1dbe
a01d3ba
 
 
a7bba68
a01d3ba
aef1dbe
a01d3ba
aef1dbe
a01d3ba
a7bba68
 
aef1dbe
 
a01d3ba
aef1dbe

import functools
import operator

import pandas as pd


def correlations_for_group(group):
    REL_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_related")]
    IND_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_independent")]
    AGGR_METRICS = [col.split("_")[0] for col in group.columns if col.endswith("_aggr")]

    correlations = []
    for rel_metric in REL_METRICS:
        for ind_metric in IND_METRICS:
            correlations.append({
                f"rel_{rel_metric}_ind_{ind_metric}_pearson": group[f"{rel_metric}_related"].corr(
                    group[f"{ind_metric}_independent"], method="pearson"),
                f"rel_{rel_metric}_ind_{ind_metric}_spearman": group[f"{rel_metric}_related"].corr(
                    group[f"{ind_metric}_independent"], method="spearman"),
            })
        for aggr_metric in AGGR_METRICS:
            correlations.append({
                f"rel_{rel_metric}_aggr_{aggr_metric}_pearson": group[f"{rel_metric}_related"].corr(
                    group[f"{aggr_metric}_aggr"], method="pearson"),
                f"rel_{rel_metric}_aggr_{aggr_metric}_spearman": group[f"{rel_metric}_related"].corr(
                    group[f"{aggr_metric}_aggr"], method="spearman"),
            })
    return pd.Series(functools.reduce(operator.ior, correlations, {}))


def split_metrics_string(s):
    tokens = s.split("_")
    return tokens[1], tokens[3]


def get_correlations_df(df, right_side):
    correlations_raw = correlations_for_group(df)

    idx = list(set("_".join(col.split("_")[:-1]) for col in correlations_raw.index if right_side in col))

    data = []
    for metrics in idx:
        data.append(
            {"metrics": metrics,
             "spearman": correlations_raw[f"{metrics}_spearman"],
             "pearson": correlations_raw[f"{metrics}_pearson"],
             }
        )

    result = pd.DataFrame.from_records(data=data, index="metrics").sort_index()
    result.index = pd.MultiIndex.from_tuples(result.index.map(split_metrics_string).tolist())
    result.index.set_names(["relative", "independent"], inplace=True)

    return result


def get_correlations_for_groups(df, right_side):
    correlations = {"all": get_correlations_df(df, right_side=right_side)}

    for e2s in (False, True):
        for s2e in (False, True):
            group = "golden"
            if e2s:
                group += "+e2s"
            if s2e:
                group += "+s2e"

            subdf = df[((df["end_to_start"] == e2s) & (df["start_to_end"] == s2e)) | (
                    (df["end_to_start"] == False) & (df["start_to_end"] == False))]
            subdf_corr = get_correlations_df(subdf, right_side=right_side)
            correlations[group] = subdf_corr

    correlations = pd.concat(correlations, axis=1)
    return correlations