File size: 4,509 Bytes
3bbb319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import re
from glob import glob

from titlecase import titlecase

os.makedirs('topics', exist_ok=True)
os.makedirs('papers', exist_ok=True)


def _parse_task(task):
    """Parse task name.

    Data modality is represented by a string of 4 or 5 parts like:
    - 2d_kpt_sview_rgb_img
    - gesture_sview_rgbd_vid
    """

    parts = task.split('_')
    if len(parts) == 5:
        pass
    elif len(parts) == 4:
        # The first part "spatial dimension" is optional
        parts = [''] + parts
    else:
        raise ValueError('Invalid modality')

    return parts


# Step 1: get subtopics: a mix of topic and task
minisections = [
    x.split(osp.sep)[-2:] for x in glob('../../configs/*/*')
    if '_base_' not in x
]
alltopics = sorted(list(set(x[0] for x in minisections)))
subtopics = []
for topic in alltopics:
    tasks = [_parse_task(x[1]) for x in minisections if x[0] == topic]
    valid_ids = []
    for i in range(len(tasks[0])):
        if len(set(x[i] for x in tasks)) > 1:
            valid_ids.append(i)
    if len(valid_ids) > 0:
        for task in tasks:
            appendix = ','.join(
                [task[i].title() for i in valid_ids if task[i]])
            subtopic = [
                f'{titlecase(topic)}({appendix})',
                topic,
                '_'.join(t for t in task if t),
            ]
            subtopics.append(subtopic)
    else:
        subtopics.append([titlecase(topic), topic, '_'.join(tasks[0])])

contents = {}
for subtopic, topic, task in sorted(subtopics):
    # Step 2: get all datasets
    datasets = sorted(
        list(
            set(
                x.split(osp.sep)[-2]
                for x in glob(f'../../configs/{topic}/{task}/*/*/'))))
    contents[subtopic] = {d: {} for d in datasets}
    for dataset in datasets:
        # Step 3: get all settings: algorithm + backbone + trick
        for file in glob(f'../../configs/{topic}/{task}/*/{dataset}/*.md'):
            keywords = (file.split(osp.sep)[-3],
                        *file.split(osp.sep)[-1].split('_')[:-1])
            with open(file, 'r', encoding='utf-8') as f:
                contents[subtopic][dataset][keywords] = f.read()

# Step 4: write files by topic
for subtopic, datasets in contents.items():
    lines = [f'# {subtopic}', '']
    for dataset, keywords in datasets.items():
        if len(keywords) == 0:
            continue
        lines += [
            '<hr/>', '<br/><br/>', '', f'## {titlecase(dataset)} Dataset', ''
        ]
        for keyword, info in keywords.items():
            keyword_strs = [titlecase(x.replace('_', ' ')) for x in keyword]
            lines += [
                '<br/>', '',
                (f'### {" + ".join(keyword_strs)}'
                 f' on {titlecase(dataset)}'), '', info, ''
            ]

    with open(f'topics/{subtopic.lower()}.md', 'w', encoding='utf-8') as f:
        f.write('\n'.join(lines))

# Step 5: write files by paper
allfiles = [x.split(osp.sep)[-2:] for x in glob('../en/papers/*/*.md')]
sections = sorted(list(set(x[0] for x in allfiles)))
for section in sections:
    lines = [f'# {titlecase(section)}', '']
    files = [f for s, f in allfiles if s == section]
    for file in files:
        with open(
                f'../en/papers/{section}/{file}', 'r', encoding='utf-8') as f:
            keyline = [
                line for line in f.readlines() if line.startswith('<summary')
            ][0]
        papername = re.sub(r'\<.*?\>', '', keyline).strip()
        paperlines = []
        for subtopic, datasets in contents.items():
            for dataset, keywords in datasets.items():
                keywords = {k: v for k, v in keywords.items() if keyline in v}
                if len(keywords) == 0:
                    continue
                for keyword, info in keywords.items():
                    keyword_strs = [
                        titlecase(x.replace('_', ' ')) for x in keyword
                    ]
                    paperlines += [
                        '<br/>', '',
                        (f'### {" + ".join(keyword_strs)}'
                         f' on {titlecase(dataset)}'), '', info, ''
                    ]
        if len(paperlines) > 0:
            lines += ['<hr/>', '<br/><br/>', '', f'## {papername}', '']
            lines += paperlines

    with open(f'papers/{section}.md', 'w', encoding='utf-8') as f:
        f.write('\n'.join(lines))