Spaces:
Running
on
A10G
Running
on
A10G
# Copyright (c) 2023 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import os | |
import json | |
from tqdm import tqdm | |
def cal_metadata(cfg): | |
""" | |
Dump metadata (singers.json, meta_info.json, utt2singer) for singer dataset or multi-datasets. | |
""" | |
from collections import Counter | |
datasets = cfg.dataset | |
print("-" * 10) | |
print("Preparing metadata...") | |
print("Including: \n{}\n".format("\n".join(datasets))) | |
datasets.sort() | |
for dataset in tqdm(datasets): | |
save_dir = os.path.join(cfg.preprocess.processed_dir, dataset) | |
assert os.path.exists(save_dir) | |
# 'train.json' and 'test.json' of target dataset | |
train_metadata = os.path.join(save_dir, "train.json") | |
test_metadata = os.path.join(save_dir, "test.json") | |
# Sort the metadata as the duration order | |
with open(train_metadata, "r", encoding="utf-8") as f: | |
train_utterances = json.load(f) | |
with open(test_metadata, "r", encoding="utf-8") as f: | |
test_utterances = json.load(f) | |
train_utterances = sorted(train_utterances, key=lambda x: x["Duration"]) | |
test_utterances = sorted(test_utterances, key=lambda x: x["Duration"]) | |
# Write back the sorted metadata | |
with open(train_metadata, "w") as f: | |
json.dump(train_utterances, f, indent=4, ensure_ascii=False) | |
with open(test_metadata, "w") as f: | |
json.dump(test_utterances, f, indent=4, ensure_ascii=False) | |
# Paths of metadata needed to be generated | |
singer_dict_file = os.path.join(save_dir, cfg.preprocess.spk2id) | |
utt2singer_file = os.path.join(save_dir, cfg.preprocess.utt2spk) | |
# Get the total duration and singer names for train and test utterances | |
train_total_duration = sum(utt["Duration"] for utt in train_utterances) | |
test_total_duration = sum(utt["Duration"] for utt in test_utterances) | |
singer_names = set( | |
f"{replace_augment_name(utt['Dataset'])}_{utt['Singer']}" | |
for utt in train_utterances + test_utterances | |
) | |
# Write the utt2singer file and sort the singer names | |
with open(utt2singer_file, "w", encoding="utf-8") as f: | |
for utt in train_utterances + test_utterances: | |
f.write( | |
f"{utt['Dataset']}_{utt['Uid']}\t{replace_augment_name(utt['Dataset'])}_{utt['Singer']}\n" | |
) | |
singer_names = sorted(singer_names) | |
singer_lut = {name: i for i, name in enumerate(singer_names)} | |
# dump singers.json | |
with open(singer_dict_file, "w", encoding="utf-8") as f: | |
json.dump(singer_lut, f, indent=4, ensure_ascii=False) | |
meta_info = { | |
"dataset": dataset, | |
"statistics": { | |
"size": len(train_utterances) + len(test_utterances), | |
"hours": round(train_total_duration / 3600, 4) | |
+ round(test_total_duration / 3600, 4), | |
}, | |
"train": { | |
"size": len(train_utterances), | |
"hours": round(train_total_duration / 3600, 4), | |
}, | |
"test": { | |
"size": len(test_utterances), | |
"hours": round(test_total_duration / 3600, 4), | |
}, | |
"singers": {"size": len(singer_lut)}, | |
} | |
# Use Counter to count the minutes for each singer | |
total_singer2mins = Counter() | |
training_singer2mins = Counter() | |
for utt in train_utterances: | |
k = f"{replace_augment_name(utt['Dataset'])}_{utt['Singer']}" | |
training_singer2mins[k] += utt["Duration"] / 60 | |
total_singer2mins[k] += utt["Duration"] / 60 | |
for utt in test_utterances: | |
k = f"{replace_augment_name(utt['Dataset'])}_{utt['Singer']}" | |
total_singer2mins[k] += utt["Duration"] / 60 | |
training_singer2mins = dict( | |
sorted(training_singer2mins.items(), key=lambda x: x[1], reverse=True) | |
) | |
training_singer2mins = {k: round(v, 2) for k, v in training_singer2mins.items()} | |
meta_info["singers"]["training_minutes"] = training_singer2mins | |
total_singer2mins = dict( | |
sorted(total_singer2mins.items(), key=lambda x: x[1], reverse=True) | |
) | |
total_singer2mins = {k: round(v, 2) for k, v in total_singer2mins.items()} | |
meta_info["singers"]["minutes"] = total_singer2mins | |
with open(os.path.join(save_dir, "meta_info.json"), "w") as f: | |
json.dump(meta_info, f, indent=4, ensure_ascii=False) | |
for singer, min in training_singer2mins.items(): | |
print(f"Singer {singer}: {min} mins for training") | |
print("-" * 10, "\n") | |
def replace_augment_name(dataset: str) -> str: | |
"""Replace the augmented dataset name with the original dataset name. | |
>>> print(replace_augment_name("dataset_equalizer")) | |
dataset | |
""" | |
if "equalizer" in dataset: | |
dataset = dataset.replace("_equalizer", "") | |
elif "formant_shift" in dataset: | |
dataset = dataset.replace("_formant_shift", "") | |
elif "pitch_shift" in dataset: | |
dataset = dataset.replace("_pitch_shift", "") | |
elif "time_stretch" in dataset: | |
dataset = dataset.replace("_time_stretch", "") | |
else: | |
pass | |
return dataset | |