Spaces:
Sleeping
Sleeping
File size: 4,447 Bytes
d5d7329 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
from __future__ import annotations
import warnings
from logging import getLogger
from pathlib import Path
from typing import Iterable
import librosa
import soundfile
from joblib import Parallel, delayed
from tqdm_joblib import tqdm_joblib
from .preprocess_utils import check_hubert_min_duration
LOG = getLogger(__name__)
# input_dir and output_dir exists.
# write code to convert input dir audio files to output dir audio files,
# without changing folder structure. Use joblib to parallelize.
# Converting audio files includes:
# - resampling to specified sampling rate
# - trim silence
# - adjust volume in a smart way
# - save as 16-bit wav file
def _get_unique_filename(path: Path, existing_paths: Iterable[Path]) -> Path:
"""Return a unique path by appending a number to the original path."""
if path not in existing_paths:
return path
i = 1
while True:
new_path = path.parent / f"{path.stem}_{i}{path.suffix}"
if new_path not in existing_paths:
return new_path
i += 1
def is_relative_to(path: Path, *other):
"""Return True if the path is relative to another path or False.
Python 3.9+ has Path.is_relative_to() method, but we need to support Python 3.8.
"""
try:
path.relative_to(*other)
return True
except ValueError:
return False
def _preprocess_one(
input_path: Path,
output_path: Path,
sr: int,
*,
top_db: int,
frame_seconds: float,
hop_seconds: float,
) -> None:
"""Preprocess one audio file."""
try:
audio, sr = librosa.load(input_path, sr=sr, mono=True)
# Audioread is the last backend it will attempt, so this is the exception thrown on failure
except Exception as e:
# Failure due to attempting to load a file that is not audio, so return early
LOG.warning(f"Failed to load {input_path} due to {e}")
return
if not check_hubert_min_duration(audio, sr):
LOG.info(f"Skip {input_path} because it is too short.")
return
# Adjust volume
audio /= max(audio.max(), -audio.min())
# Trim silence
audio, _ = librosa.effects.trim(
audio,
top_db=top_db,
frame_length=int(frame_seconds * sr),
hop_length=int(hop_seconds * sr),
)
if not check_hubert_min_duration(audio, sr):
LOG.info(f"Skip {input_path} because it is too short.")
return
soundfile.write(output_path, audio, samplerate=sr, subtype="PCM_16")
def preprocess_resample(
input_dir: Path | str,
output_dir: Path | str,
sampling_rate: int,
n_jobs: int = -1,
*,
top_db: int = 30,
frame_seconds: float = 0.1,
hop_seconds: float = 0.05,
) -> None:
input_dir = Path(input_dir)
output_dir = Path(output_dir)
"""Preprocess audio files in input_dir and save them to output_dir."""
out_paths = []
in_paths = list(input_dir.rglob("*.*"))
if not in_paths:
raise ValueError(f"No audio files found in {input_dir}")
for in_path in in_paths:
in_path_relative = in_path.relative_to(input_dir)
if not in_path.is_absolute() and is_relative_to(
in_path, Path("dataset_raw") / "44k"
):
new_in_path_relative = in_path_relative.relative_to("44k")
warnings.warn(
f"Recommended folder structure has changed since v1.0.0. "
"Please move your dataset directly under dataset_raw folder. "
f"Recoginzed {in_path_relative} as {new_in_path_relative}"
)
in_path_relative = new_in_path_relative
if len(in_path_relative.parts) < 2:
continue
speaker_name = in_path_relative.parts[0]
file_name = in_path_relative.with_suffix(".wav").name
out_path = output_dir / speaker_name / file_name
out_path = _get_unique_filename(out_path, out_paths)
out_path.parent.mkdir(parents=True, exist_ok=True)
out_paths.append(out_path)
in_and_out_paths = list(zip(in_paths, out_paths))
with tqdm_joblib(desc="Preprocessing", total=len(in_and_out_paths)):
Parallel(n_jobs=n_jobs)(
delayed(_preprocess_one)(
*args,
sr=sampling_rate,
top_db=top_db,
frame_seconds=frame_seconds,
hop_seconds=hop_seconds,
)
for args in in_and_out_paths
)
|