File size: 2,620 Bytes
4b2522c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65687fa
4b2522c
 
 
 
e5438c2
4b2522c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84b5dfa
4b2522c
 
84b5dfa
 
4b2522c
 
 
 
 
84b5dfa
4b2522c
 
 
 
 
 
 
84b5dfa
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from collections import defaultdict

from src.leaderboards.saved import leaderboard_to_tags
from src.static.env import API

def group_all_tags(input_tags: list[str]) -> dict:
    """Groups the tags by categories, following the division in the README.

    Args:
        input_tags (list[str]): list of tags

    Returns:
        dict: category to tag list
    """
    output_tags = defaultdict(list)
    for tag in input_tags:
        if tag == "arena":
            output_tags["judge"].append("humans")
            continue

        try:
            category, value = tag.split(":")
            output_tags[category].append(value.lower())
        except ValueError:
            continue
    
    return output_tags


def get_leaderboard_info() -> tuple[list, dict]:
    """Looks up all spaces tagged as leaderboards or arenas on the hub, 
    and homogeneizes their tags.

    Returns:
        dict: All leaderboard names to their tag dicts by category
    """
    leaderboards = [
        (s.id, s.tags) for s in API.list_spaces(
        filter=["leaderboard"]
    )]
    arenas = [
        (s.id, s.tags) for s in API.list_spaces(
        filter=["arena"]
    )]
    saved_leaderboards = [(k, v) for k, v in leaderboard_to_tags.items()]

    seen_leaderboards = []
    leaderboard_to_info = defaultdict(list)
    info_to_leaderboard = defaultdict(lambda: defaultdict(list))
    for name, tags in leaderboards + arenas + saved_leaderboards:
        # If we have a duplicate between the leaderboards from the hub (leaderboards, arena) 
        # and the ones we saved manually, we use the version from the hub
        if name in seen_leaderboards:
            continue

        seen_leaderboards.append(name)

        # If the model has its own tags, plus the ones we saved, we aggregate them
        if name in leaderboard_to_tags:
            tags += leaderboard_to_tags[name]

        grouped_tags = group_all_tags(tags)
        for category, tags in grouped_tags.items():
            for tag in tags:
                info_to_leaderboard[category][tag].append(name)
                leaderboard_to_info[name].append(f"{category}:{tag}")

    # We pass everything to sets
    for leaderboard, tags in leaderboard_to_info.items():
        leaderboard_to_info[leaderboard] = sorted(list(set(tags)))

    for category, category_dict in info_to_leaderboard.items():
        for tag, space_list in category_dict.items():
            info_to_leaderboard[category][tag] = sorted(list(set(space_list)))

    info_to_leaderboard["all"] = sorted(list(set(seen_leaderboards)))
    return leaderboard_to_info, info_to_leaderboard