File size: 4,509 Bytes
3bbb319 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
#!/usr/bin/env python
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import re
from glob import glob
from titlecase import titlecase
os.makedirs('topics', exist_ok=True)
os.makedirs('papers', exist_ok=True)
def _parse_task(task):
"""Parse task name.
Data modality is represented by a string of 4 or 5 parts like:
- 2d_kpt_sview_rgb_img
- gesture_sview_rgbd_vid
"""
parts = task.split('_')
if len(parts) == 5:
pass
elif len(parts) == 4:
# The first part "spatial dimension" is optional
parts = [''] + parts
else:
raise ValueError('Invalid modality')
return parts
# Step 1: get subtopics: a mix of topic and task
minisections = [
x.split(osp.sep)[-2:] for x in glob('../../configs/*/*')
if '_base_' not in x
]
alltopics = sorted(list(set(x[0] for x in minisections)))
subtopics = []
for topic in alltopics:
tasks = [_parse_task(x[1]) for x in minisections if x[0] == topic]
valid_ids = []
for i in range(len(tasks[0])):
if len(set(x[i] for x in tasks)) > 1:
valid_ids.append(i)
if len(valid_ids) > 0:
for task in tasks:
appendix = ','.join(
[task[i].title() for i in valid_ids if task[i]])
subtopic = [
f'{titlecase(topic)}({appendix})',
topic,
'_'.join(t for t in task if t),
]
subtopics.append(subtopic)
else:
subtopics.append([titlecase(topic), topic, '_'.join(tasks[0])])
contents = {}
for subtopic, topic, task in sorted(subtopics):
# Step 2: get all datasets
datasets = sorted(
list(
set(
x.split(osp.sep)[-2]
for x in glob(f'../../configs/{topic}/{task}/*/*/'))))
contents[subtopic] = {d: {} for d in datasets}
for dataset in datasets:
# Step 3: get all settings: algorithm + backbone + trick
for file in glob(f'../../configs/{topic}/{task}/*/{dataset}/*.md'):
keywords = (file.split(osp.sep)[-3],
*file.split(osp.sep)[-1].split('_')[:-1])
with open(file, 'r', encoding='utf-8') as f:
contents[subtopic][dataset][keywords] = f.read()
# Step 4: write files by topic
for subtopic, datasets in contents.items():
lines = [f'# {subtopic}', '']
for dataset, keywords in datasets.items():
if len(keywords) == 0:
continue
lines += [
'<hr/>', '<br/><br/>', '', f'## {titlecase(dataset)} Dataset', ''
]
for keyword, info in keywords.items():
keyword_strs = [titlecase(x.replace('_', ' ')) for x in keyword]
lines += [
'<br/>', '',
(f'### {" + ".join(keyword_strs)}'
f' on {titlecase(dataset)}'), '', info, ''
]
with open(f'topics/{subtopic.lower()}.md', 'w', encoding='utf-8') as f:
f.write('\n'.join(lines))
# Step 5: write files by paper
allfiles = [x.split(osp.sep)[-2:] for x in glob('../en/papers/*/*.md')]
sections = sorted(list(set(x[0] for x in allfiles)))
for section in sections:
lines = [f'# {titlecase(section)}', '']
files = [f for s, f in allfiles if s == section]
for file in files:
with open(
f'../en/papers/{section}/{file}', 'r', encoding='utf-8') as f:
keyline = [
line for line in f.readlines() if line.startswith('<summary')
][0]
papername = re.sub(r'\<.*?\>', '', keyline).strip()
paperlines = []
for subtopic, datasets in contents.items():
for dataset, keywords in datasets.items():
keywords = {k: v for k, v in keywords.items() if keyline in v}
if len(keywords) == 0:
continue
for keyword, info in keywords.items():
keyword_strs = [
titlecase(x.replace('_', ' ')) for x in keyword
]
paperlines += [
'<br/>', '',
(f'### {" + ".join(keyword_strs)}'
f' on {titlecase(dataset)}'), '', info, ''
]
if len(paperlines) > 0:
lines += ['<hr/>', '<br/><br/>', '', f'## {papername}', '']
lines += paperlines
with open(f'papers/{section}.md', 'w', encoding='utf-8') as f:
f.write('\n'.join(lines))
|