Spaces:
Runtime error
Runtime error
import numpy as np | |
import matplotlib.pyplot as plt | |
from numba import jit | |
import torch | |
def time_warp(costs): | |
dtw = np.zeros_like(costs) | |
dtw[0, 1:] = np.inf | |
dtw[1:, 0] = np.inf | |
eps = 1e-4 | |
for i in range(1, costs.shape[0]): | |
for j in range(1, costs.shape[1]): | |
dtw[i, j] = costs[i, j] + min(dtw[i - 1, j], dtw[i, j - 1], dtw[i - 1, j - 1]) | |
return dtw | |
def align_from_distances(distance_matrix, debug=False, return_mindist=False): | |
# for each position in spectrum 1, returns best match position in spectrum2 | |
# using monotonic alignment | |
dtw = time_warp(distance_matrix) | |
i = distance_matrix.shape[0] - 1 | |
j = distance_matrix.shape[1] - 1 | |
results = [0] * distance_matrix.shape[0] | |
while i > 0 and j > 0: | |
results[i] = j | |
i, j = min([(i - 1, j), (i, j - 1), (i - 1, j - 1)], key=lambda x: dtw[x[0], x[1]]) | |
if debug: | |
visual = np.zeros_like(dtw) | |
visual[range(len(results)), results] = 1 | |
plt.matshow(visual) | |
plt.show() | |
if return_mindist: | |
return results, dtw[-1, -1] | |
return results | |
def get_local_context(input_f, max_window=32, scale_factor=1.): | |
# input_f: [S, 1], support numpy array or torch tensor | |
# return hist: [S, max_window * 2], list of list | |
T = input_f.shape[0] | |
# max_window = int(max_window * scale_factor) | |
derivative = [[0 for _ in range(max_window * 2)] for _ in range(T)] | |
for t in range(T): # travel the time series | |
for feat_idx in range(-max_window, max_window): | |
if t + feat_idx < 0 or t + feat_idx >= T: | |
value = 0 | |
else: | |
value = input_f[t + feat_idx] | |
derivative[t][feat_idx + max_window] = value | |
return derivative | |
def cal_localnorm_dist(src, tgt, src_len, tgt_len): | |
local_src = torch.tensor(get_local_context(src)) | |
local_tgt = torch.tensor(get_local_context(tgt, scale_factor=tgt_len / src_len)) | |
local_norm_src = (local_src - local_src.mean(-1).unsqueeze(-1)) # / local_src.std(-1).unsqueeze(-1) # [T1, 32] | |
local_norm_tgt = (local_tgt - local_tgt.mean(-1).unsqueeze(-1)) # / local_tgt.std(-1).unsqueeze(-1) # [T2, 32] | |
dists = torch.cdist(local_norm_src[None, :, :], local_norm_tgt[None, :, :]) # [1, T1, T2] | |
return dists | |
## here is API for one sample | |
def LoNDTWDistance(src, tgt): | |
# src: [S] | |
# tgt: [T] | |
dists = cal_localnorm_dist(src, tgt, src.shape[0], tgt.shape[0]) # [1, S, T] | |
costs = dists.squeeze(0) # [S, T] | |
alignment, min_distance = align_from_distances(costs.T.cpu().detach().numpy(), return_mindist=True) # [T] | |
return alignment, min_distance | |
# if __name__ == '__main__': | |
# # utils from ns | |
# from utils.pitch_utils import denorm_f0 | |
# from tasks.singing.fsinging import FastSingingDataset | |
# from utils.hparams import hparams, set_hparams | |
# | |
# set_hparams() | |
# | |
# train_ds = FastSingingDataset('test') | |
# | |
# # Test One sample case | |
# sample = train_ds[0] | |
# amateur_f0 = sample['f0'] | |
# prof_f0 = sample['prof_f0'] | |
# | |
# amateur_uv = sample['uv'] | |
# amateur_padding = sample['mel2ph'] == 0 | |
# prof_uv = sample['prof_uv'] | |
# prof_padding = sample['prof_mel2ph'] == 0 | |
# amateur_f0_denorm = denorm_f0(amateur_f0, amateur_uv, hparams, pitch_padding=amateur_padding) | |
# prof_f0_denorm = denorm_f0(prof_f0, prof_uv, hparams, pitch_padding=prof_padding) | |
# alignment, min_distance = LoNDTWDistance(amateur_f0_denorm, prof_f0_denorm) | |
# print(min_distance) | |
# python utils/pitch_distance.py --config egs/datasets/audio/molar/svc_ppg.yaml | |