Spaces:
Running
on
A10G
Running
on
A10G
File size: 2,417 Bytes
0883aa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import glob
import librosa
import json
from utils.util import has_existed
def main(output_path, dataset_path):
print("-" * 10)
print("Preparing training dataset for svcceval...")
data_dir = os.path.join(dataset_path, "Data")
save_dir = os.path.join(output_path, "svcceval")
os.makedirs(save_dir, exist_ok=True)
singer_dict_file = os.path.join(save_dir, "singers.json")
utt2singer_file = os.path.join(save_dir, "utt2singer")
utt2singer = open(utt2singer_file, "w")
# Load utterances
train = []
test = []
singers = []
for wav_file in glob.glob(os.path.join(data_dir, "*/*.wav")):
singer, filename = wav_file.split("/")[-2:]
uid = filename.split(".")[0]
utt = {
"Dataset": "svcceval",
"Singer": singer,
"Uid": "{}_{}".format(singer, uid),
"Path": wav_file,
}
# Duration
duration = librosa.get_duration(filename=wav_file)
utt["Duration"] = duration
test.append(utt)
singers.append(singer)
utt2singer.write("{}\t{}\n".format(utt["Uid"], utt["Singer"]))
# Save singers.json
unique_singers = list(set(singers))
unique_singers.sort()
singer_lut = {name: i for i, name in enumerate(unique_singers)}
with open(singer_dict_file, "w") as f:
json.dump(singer_lut, f, indent=4, ensure_ascii=False)
train_total_duration = sum([utt["Duration"] for utt in train])
test_total_duration = sum([utt["Duration"] for utt in test])
for dataset_type in ["train", "test"]:
output_file = os.path.join(save_dir, "{}.json".format(dataset_type))
if has_existed(output_file):
continue
utterances = eval(dataset_type)
utterances = sorted(utterances, key=lambda x: x["Uid"])
for i in range(len(utterances)):
utterances[i]["index"] = i
print("{}: Total size: {}\n".format(dataset_type, len(utterances)))
# Save
with open(output_file, "w") as f:
json.dump(utterances, f, indent=4, ensure_ascii=False)
print(
"#Train hours= {}, #Test hours= {}".format(
train_total_duration / 3600, test_total_duration / 3600
)
)
|