Spaces:
Running
on
A10G
Running
on
A10G
# Copyright (c) 2023 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import os | |
import glob | |
import librosa | |
import json | |
from utils.util import has_existed | |
def main(output_path, dataset_path): | |
print("-" * 10) | |
print("Preparing training dataset for svcceval...") | |
data_dir = os.path.join(dataset_path, "Data") | |
save_dir = os.path.join(output_path, "svcceval") | |
os.makedirs(save_dir, exist_ok=True) | |
singer_dict_file = os.path.join(save_dir, "singers.json") | |
utt2singer_file = os.path.join(save_dir, "utt2singer") | |
utt2singer = open(utt2singer_file, "w") | |
# Load utterances | |
train = [] | |
test = [] | |
singers = [] | |
for wav_file in glob.glob(os.path.join(data_dir, "*/*.wav")): | |
singer, filename = wav_file.split("/")[-2:] | |
uid = filename.split(".")[0] | |
utt = { | |
"Dataset": "svcceval", | |
"Singer": singer, | |
"Uid": "{}_{}".format(singer, uid), | |
"Path": wav_file, | |
} | |
# Duration | |
duration = librosa.get_duration(filename=wav_file) | |
utt["Duration"] = duration | |
test.append(utt) | |
singers.append(singer) | |
utt2singer.write("{}\t{}\n".format(utt["Uid"], utt["Singer"])) | |
# Save singers.json | |
unique_singers = list(set(singers)) | |
unique_singers.sort() | |
singer_lut = {name: i for i, name in enumerate(unique_singers)} | |
with open(singer_dict_file, "w") as f: | |
json.dump(singer_lut, f, indent=4, ensure_ascii=False) | |
train_total_duration = sum([utt["Duration"] for utt in train]) | |
test_total_duration = sum([utt["Duration"] for utt in test]) | |
for dataset_type in ["train", "test"]: | |
output_file = os.path.join(save_dir, "{}.json".format(dataset_type)) | |
if has_existed(output_file): | |
continue | |
utterances = eval(dataset_type) | |
utterances = sorted(utterances, key=lambda x: x["Uid"]) | |
for i in range(len(utterances)): | |
utterances[i]["index"] = i | |
print("{}: Total size: {}\n".format(dataset_type, len(utterances))) | |
# Save | |
with open(output_file, "w") as f: | |
json.dump(utterances, f, indent=4, ensure_ascii=False) | |
print( | |
"#Train hours= {}, #Test hours= {}".format( | |
train_total_duration / 3600, test_total_duration / 3600 | |
) | |
) | |