Spaces:
Running
Running
import os | |
import fnmatch | |
import shutil | |
import numpy | |
import torchaudio | |
import gradio | |
from bark.hubert.pre_kmeans_hubert import CustomHubert | |
from bark.hubert.customtokenizer import auto_train | |
from tqdm.auto import tqdm | |
def training_prepare_files(path, model,progress=gradio.Progress(track_tqdm=True)): | |
semanticsfolder = "./training/data/output" | |
wavfolder = "./training/data/output_wav" | |
ready = os.path.join(path, 'ready') | |
testfiles = fnmatch.filter(os.listdir(ready), '*.npy') | |
if(len(testfiles) < 1): | |
# prepare and copy for training | |
hubert_model = CustomHubert(checkpoint_path=model) | |
wavfiles = fnmatch.filter(os.listdir(wavfolder), '*.wav') | |
for i, f in tqdm(enumerate(wavfiles), total=len(wavfiles)): | |
semaname = '.'.join(f.split('.')[:-1]) # Cut off the extension | |
semaname = f'{semaname}.npy' | |
semafilename = os.path.join(semanticsfolder, semaname) | |
if not os.path.isfile(semafilename): | |
print(f'Skipping {f} no semantics pair found!') | |
continue | |
print('Processing', f) | |
wav, sr = torchaudio.load(os.path.join(wavfolder, f)) | |
if wav.shape[0] == 2: # Stereo to mono if needed | |
wav = wav.mean(0, keepdim=True) | |
output = hubert_model.forward(wav, input_sample_hz=sr) | |
out_array = output.cpu().numpy() | |
fname = f'{i}_semantic_features.npy' | |
numpy.save(os.path.join(ready, fname), out_array) | |
fname = f'{i}_semantic.npy' | |
shutil.copy(semafilename, os.path.join(ready, fname)) | |
def train(path, save_every, max_epochs): | |
auto_train(path, save_epochs=save_every) | |