import os import scipy import nltk import tempfile import numpy as np from bark.generation import preload_models, SAMPLE_RATE from bark import generate_audio from scipy.io import wavfile from IPython.display import Audio import gradio as gr nltk.download('punkt') os.environ["CUDA_VISIBLE_DEVICES"] = "0" preload_models() def generate_audio_from_text(text,language_prompt,gender_prompt): if language_prompt == "english": if gender_prompt == "male": history_prompt = "v2/en_speaker_8" else: history_prompt = "v2/en_speaker_9" elif language_prompt == "french": if gender_prompt == "male": history_prompt = "v2/fr_speaker_0" else: history_prompt = "v2/fr_speaker_1" elif language_prompt =="german": if gender_prompt=="male": history_prompt = "v2/de_speaker_2" else: history_prompt="v2/de_speaker_3" elif language_prompt =="hindi": if gender_prompt=="male": history_prompt = "v2/hi_speaker_8" else: history_prompt="v2/hi_speaker_3" elif language_prompt =="chinese": if gender_prompt=="male": history_prompt = "v2/zh_speaker_1" else: history_prompt="v2/zh_speaker_4" elif language_prompt =="italian": if gender_prompt=="male": history_prompt = "v2/it_speaker_4" else: history_prompt="v2/it_speaker_7" elif language_prompt =="japanese": if gender_prompt=="male": history_prompt = "v2/ja_speaker_2" else: history_prompt="v2/ja_speaker_0" else: raise ValueError("Invalid language or gender selection") sentences = nltk.sent_tokenize(text) # silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence pieces = [] for sentence in sentences: audio_array = generate_audio(sentence, history_prompt=history_prompt) pieces += [audio_array] # Concatenate the audio pieces final_audio = np.concatenate(pieces) # Save the audio to a WAV file with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav: wavfile.write(temp_wav.name, SAMPLE_RATE, final_audio) # Return the saved audio file return temp_wav.name # Define lists of language and gender options language_options = [ "english", "french", "german", "hindi", "chinese", "italian", "japanese", ] gender_options = [ "male", "female", ] # Create a Gradio interface with text input and dropdown menus for language and gender iface = gr.Interface( fn=generate_audio_from_text, inputs=[ gr.Textbox(text="Enter text to convert to speech:"), gr.Dropdown(choices=language_options, label="Select language:"), gr.Dropdown(choices=gender_options, label="Select gender:"), ], outputs=gr.outputs.File(label="Download WAV File"), title="Text-to-Speech App Vertical Solution", timeout=300, ) # Launch the Gradio app with sharing enabled iface.launch(share=True, debug=True, enable_queue=True)