Spaces:
Runtime error
Runtime error
import os | |
import nltk # we'll use this to split into sentences | |
import numpy as np | |
from bark.generation import ( | |
generate_text_semantic, | |
preload_models, | |
) | |
from bark.api import semantic_to_waveform | |
from bark import generate_audio, SAMPLE_RATE | |
import soundfile as sf | |
os.environ["CUDA_VISIBLE_DEVICES"] = "0" | |
# Loads the model, should be run one time | |
preload_models() | |
class AudioBook: | |
def __init__(self, output_folder="output"): | |
self.output_folder = output_folder | |
# Create the output folder if it doesn't exist | |
if not os.path.exists(output_folder): | |
os.makedirs(output_folder) | |
def generate_audio_from_text(self, text, speaker="male", filename="output_audio"): | |
# Preprocess text | |
text = text.replace("\n", " ").strip() | |
sentences = nltk.sent_tokenize(text) | |
# Choose the speaker based on the input | |
if speaker == "male": | |
SPEAKER = "v2/en_speaker_6" | |
elif speaker == "female": | |
SPEAKER = "v2/en_speaker_9" | |
else: | |
raise ValueError("Invalid speaker selection. Use 'male' or 'female'.") | |
silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter-second of silence | |
pieces = [] | |
for sentence in sentences: | |
audio_array = generate_audio(sentence, history_prompt=SPEAKER, text_temp=0.7, waveform_temp=0.7) | |
pieces += [audio_array, silence.copy()] | |
audio_data = np.concatenate(pieces) | |
# Save the audio to a WAV file in the output folder | |
output_path = os.path.join(self.output_folder, f"{filename}.wav") | |
sf.write(output_path, audio_data, SAMPLE_RATE) | |
return output_path | |