Bark-Voice-Cloning

Running

File size: 2,503 Bytes

a6aa664

import random
import uuid
import numpy
import os
import random
import fnmatch

from tqdm.auto import tqdm
from scipy.io import wavfile

from bark.generation import load_model, SAMPLE_RATE
from bark.api import semantic_to_waveform

from bark import text_to_semantic
from bark.generation import load_model

from training.data import load_books, random_split_chunk

output = 'training/data/output'
output_wav = 'training/data/output_wav'


def prepare_semantics_from_text(num_generations):
    loaded_data = load_books(True)

    print('Loading semantics model')
    load_model(use_gpu=True, use_small=False, force_reload=False, model_type='text')

    if not os.path.isdir(output):
        os.mkdir(output)

    loop = 1
    while 1:
        filename = uuid.uuid4().hex + '.npy'
        file_name = os.path.join(output, filename)
        text = ''
        while not len(text) > 0:
            text = random_split_chunk(loaded_data)  # Obtain a short chunk of text
            text = text.strip()
        print(f'{loop} Generating semantics for text:', text)
        loop+=1 
        semantics = text_to_semantic(text, temp=round(random.uniform(0.6, 0.8), ndigits=2))
        numpy.save(file_name, semantics)


def prepare_wavs_from_semantics():
    if not os.path.isdir(output):
        raise Exception('No \'output\' folder, make sure you run create_data.py first!')
    if not os.path.isdir(output_wav):
        os.mkdir(output_wav)

    print('Loading coarse model')
    load_model(use_gpu=True, use_small=False, force_reload=False, model_type='coarse')
    print('Loading fine model')
    load_model(use_gpu=True, use_small=False, force_reload=False, model_type='fine')

    files = fnmatch.filter(os.listdir(output), '*.npy')
    current = 1
    total = len(files)

    for i, f in tqdm(enumerate(files), total=len(files)):
        real_name = '.'.join(f.split('.')[:-1])  # Cut off the extension
        file_name = os.path.join(output, f)
        out_file = os.path.join(output_wav, f'{real_name}.wav')
        if not os.path.isfile(out_file) and os.path.isfile(file_name):  # Don't process files that have already been processed, to be able to continue previous generations
            print(f'Processing ({i+1}/{total}) -> {f}')
            wav = semantic_to_waveform(numpy.load(file_name), temp=round(random.uniform(0.6, 0.8), ndigits=2))
            # Change to PCM16
            # wav = (wav * 32767).astype(np.int16)
            wavfile.write(out_file, SAMPLE_RATE, wav)

    print('Done!')