File size: 1,867 Bytes
f7b117b
 
 
 
a6cfc29
 
 
f7b117b
 
 
 
 
 
 
 
 
 
 
 
 
 
a6cfc29
f7b117b
 
 
 
 
 
 
 
 
 
 
 
a6cfc29
 
 
f7b117b
 
 
 
 
 
 
 
a6cfc29
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
import wandb


def get_wandb_data(
    entity: str, project: str, api_key: str, job_type: str
) -> pd.DataFrame:
    api = wandb.Api(api_key=api_key)

    # Project is specified by <entity/project-name>
    filter_dict = {"jobType": job_type}
    runs = api.runs(f"{entity}/{project}", filters=filter_dict)

    summary_list, config_list, name_list = [], [], []
    for run in runs:
        # .summary contains the output keys/values for metrics like accuracy.
        #  We call ._json_dict to omit large files
        summary_list.append(run.summary._json_dict)

        # .config contains the hyperparameters.
        #  We remove special values that start with _.
        config_list.append(run.config)

        # .name is the human-readable name of the run.
        name_list.append(run.name)

    summary_df = pd.json_normalize(summary_list, max_level=1)
    config_df = pd.json_normalize(config_list, max_level=2)
    runs_df = pd.concat([summary_df, config_df], axis=1)
    runs_df.index = name_list
    return runs_df


def get_leaderboard(runs_df: pd.DataFrame, metrics: list[str]) -> pd.DataFrame:
    leaderboard = pd.DataFrame(index=runs_df["model"].unique(), columns=metrics).fillna(
        0
    )

    for _, building_df in runs_df.groupby("unique_id"):
        for column in leaderboard.columns:
            best_model = building_df.loc[building_df[column].idxmin()].model
            leaderboard.loc[best_model, column] += 1

    leaderboard = leaderboard.sort_values(by=list(leaderboard.columns), ascending=False)
    return leaderboard


def get_model_ranks(runs_df: pd.DataFrame, metric: str) -> pd.DataFrame:
    return (
        runs_df.groupby(["model"])
        .median(numeric_only=True)
        .sort_values(by=metric)
        .reset_index()
        .rename_axis("rank")
        .reset_index()[["rank", "model"]]
    )