|
|
|
|
|
import os |
|
import os.path as osp |
|
import re |
|
from glob import glob |
|
|
|
from titlecase import titlecase |
|
|
|
os.makedirs('topics', exist_ok=True) |
|
os.makedirs('papers', exist_ok=True) |
|
|
|
|
|
def _parse_task(task): |
|
"""Parse task name. |
|
|
|
Data modality is represented by a string of 4 or 5 parts like: |
|
- 2d_kpt_sview_rgb_img |
|
- gesture_sview_rgbd_vid |
|
""" |
|
|
|
parts = task.split('_') |
|
if len(parts) == 5: |
|
pass |
|
elif len(parts) == 4: |
|
|
|
parts = [''] + parts |
|
else: |
|
raise ValueError('Invalid modality') |
|
|
|
return parts |
|
|
|
|
|
|
|
minisections = [ |
|
x.split(osp.sep)[-2:] for x in glob('../../configs/*/*') |
|
if '_base_' not in x |
|
] |
|
alltopics = sorted(list(set(x[0] for x in minisections))) |
|
subtopics = [] |
|
for topic in alltopics: |
|
tasks = [_parse_task(x[1]) for x in minisections if x[0] == topic] |
|
valid_ids = [] |
|
for i in range(len(tasks[0])): |
|
if len(set(x[i] for x in tasks)) > 1: |
|
valid_ids.append(i) |
|
if len(valid_ids) > 0: |
|
for task in tasks: |
|
appendix = ','.join( |
|
[task[i].title() for i in valid_ids if task[i]]) |
|
subtopic = [ |
|
f'{titlecase(topic)}({appendix})', |
|
topic, |
|
'_'.join(t for t in task if t), |
|
] |
|
subtopics.append(subtopic) |
|
else: |
|
subtopics.append([titlecase(topic), topic, '_'.join(tasks[0])]) |
|
|
|
contents = {} |
|
for subtopic, topic, task in sorted(subtopics): |
|
|
|
datasets = sorted( |
|
list( |
|
set( |
|
x.split(osp.sep)[-2] |
|
for x in glob(f'../../configs/{topic}/{task}/*/*/')))) |
|
contents[subtopic] = {d: {} for d in datasets} |
|
for dataset in datasets: |
|
|
|
for file in glob(f'../../configs/{topic}/{task}/*/{dataset}/*.md'): |
|
keywords = (file.split(osp.sep)[-3], |
|
*file.split(osp.sep)[-1].split('_')[:-1]) |
|
with open(file, 'r', encoding='utf-8') as f: |
|
contents[subtopic][dataset][keywords] = f.read() |
|
|
|
|
|
for subtopic, datasets in contents.items(): |
|
lines = [f'# {subtopic}', ''] |
|
for dataset, keywords in datasets.items(): |
|
if len(keywords) == 0: |
|
continue |
|
lines += [ |
|
'<hr/>', '<br/><br/>', '', f'## {titlecase(dataset)} Dataset', '' |
|
] |
|
for keyword, info in keywords.items(): |
|
keyword_strs = [titlecase(x.replace('_', ' ')) for x in keyword] |
|
lines += [ |
|
'<br/>', '', |
|
(f'### {" + ".join(keyword_strs)}' |
|
f' on {titlecase(dataset)}'), '', info, '' |
|
] |
|
|
|
with open(f'topics/{subtopic.lower()}.md', 'w', encoding='utf-8') as f: |
|
f.write('\n'.join(lines)) |
|
|
|
|
|
allfiles = [x.split(osp.sep)[-2:] for x in glob('../en/papers/*/*.md')] |
|
sections = sorted(list(set(x[0] for x in allfiles))) |
|
for section in sections: |
|
lines = [f'# {titlecase(section)}', ''] |
|
files = [f for s, f in allfiles if s == section] |
|
for file in files: |
|
with open( |
|
f'../en/papers/{section}/{file}', 'r', encoding='utf-8') as f: |
|
keyline = [ |
|
line for line in f.readlines() if line.startswith('<summary') |
|
][0] |
|
papername = re.sub(r'\<.*?\>', '', keyline).strip() |
|
paperlines = [] |
|
for subtopic, datasets in contents.items(): |
|
for dataset, keywords in datasets.items(): |
|
keywords = {k: v for k, v in keywords.items() if keyline in v} |
|
if len(keywords) == 0: |
|
continue |
|
for keyword, info in keywords.items(): |
|
keyword_strs = [ |
|
titlecase(x.replace('_', ' ')) for x in keyword |
|
] |
|
paperlines += [ |
|
'<br/>', '', |
|
(f'### {" + ".join(keyword_strs)}' |
|
f' on {titlecase(dataset)}'), '', info, '' |
|
] |
|
if len(paperlines) > 0: |
|
lines += ['<hr/>', '<br/><br/>', '', f'## {papername}', ''] |
|
lines += paperlines |
|
|
|
with open(f'papers/{section}.md', 'w', encoding='utf-8') as f: |
|
f.write('\n'.join(lines)) |
|
|