import gradio as gr from transformers import BarkModel, AutoProcessor import torch import scipy.io.wavfile import io import numpy as np device = "cuda" if torch.cuda.is_available() else "cpu" processor = AutoProcessor.from_pretrained("suno/bark") model = BarkModel.from_pretrained("suno/bark-small") # convert to bettertransformer model = model.to_bettertransformer() def text_to_audio(question): voice_preset = "v2/en_speaker_6" inputs = processor(question, voice_preset=voice_preset) audio_array = model.generate(**inputs) audio_array = audio_array.cpu().numpy().squeeze() sample_rate = model.generation_config.sample_rate # Convert the audio data to WAV format wav_file = io.BytesIO() scipy.io.wavfile.write(wav_file, rate=sample_rate, data=np.int16(audio_array * 32767)) wav_data = wav_file.getvalue() return wav_data # Define the Gradio interface def gradio_interface(question): wav_data = text_to_audio(question) return wav_data # Return the WAV data directly # Define the Gradio UI components interface = gr.Interface( fn=gradio_interface, inputs=gr.components.Textbox(label="Question"), outputs=gr.components.Audio(type="numpy"), live=True ) interface.launch()