Spaces:
Runtime error
Runtime error
import numpy as np | |
import os, re, json, sys | |
import torch, torchaudio, pathlib | |
def load_and_process_audio(model, melody, sample_rate): | |
if melody is not None: | |
melody = torch.from_numpy(melody).to(model.device).float().t().unsqueeze(0) | |
if melody.dim() == 2: | |
melody = melody[None] | |
melody = melody[..., :int(sample_rate * model.lm.cfg.dataset.segment_duration)] | |
return melody | |
else: | |
return None | |
#From https://colab.research.google.com/drive/154CqogsdP-D_TfSF9S2z8-BY98GN_na4?usp=sharing#scrollTo=exKxNU_Z4i5I | |
#Thank you DragonForged for the link | |
def extend_audio(model, prompt_waveform, prompts, prompt_sr, segments=5, overlap=2): | |
# Calculate the number of samples corresponding to the overlap | |
overlap_samples = int(overlap * prompt_sr) | |
device = model.device | |
prompt_waveform = prompt_waveform.to(device) | |
for i in range(1, segments): | |
# Grab the end of the waveform | |
end_waveform = prompt_waveform[...,-overlap_samples:] | |
# Process the trimmed waveform using the model | |
new_audio = model.generate_continuation(end_waveform, descriptions=[prompts[i]], prompt_sample_rate=prompt_sr, progress=True) | |
# Cut the seed audio off the newly generated audio | |
new_audio = new_audio[...,overlap_samples:] | |
prompt_waveform = torch.cat([prompt_waveform, new_audio], dim=2) | |
return prompt_waveform | |
def predict(model, prompts, melody_parameters, extension_parameters): | |
melody = None #load_and_process_audio(MODEL, **melody_parameters) | |
if melody is not None: | |
output = MODEL.generate_with_chroma( | |
descriptions=[prompt[0]], | |
melody_wavs=melody, | |
melody_sample_rate=melody_parameters['sample_rate'], | |
progress=False | |
) | |
else: | |
output = model.generate(descriptions=[prompts[0]], progress=True) | |
sample_rate = model.sample_rate | |
if extension_parameters['segments'] > 1: | |
output_tensors = extend_audio(model, output, prompts, sample_rate, **extension_parameters).detach().cpu().float() | |
else: | |
output_tensors = output.detach().cpu().float() | |
return sample_rate, output_tensors |