|
import gradio as gr |
|
from transformers import BarkModel, AutoProcessor |
|
import torch |
|
import scipy.io.wavfile |
|
import io |
|
import numpy as np |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
processor = AutoProcessor.from_pretrained("suno/bark") |
|
model = BarkModel.from_pretrained("suno/bark-small") |
|
|
|
model = model.to_bettertransformer() |
|
|
|
def text_to_audio(question): |
|
voice_preset = "v2/en_speaker_6" |
|
inputs = processor(question, voice_preset=voice_preset) |
|
audio_array = model.generate(**inputs) |
|
audio_array = audio_array.cpu().numpy().squeeze() |
|
sample_rate = model.generation_config.sample_rate |
|
|
|
|
|
wav_file = io.BytesIO() |
|
scipy.io.wavfile.write(wav_file, rate=sample_rate, data=np.int16(audio_array * 32767)) |
|
wav_data = wav_file.getvalue() |
|
|
|
return wav_data |
|
|
|
|
|
def gradio_interface(question): |
|
wav_data = text_to_audio(question) |
|
return wav_data |
|
|
|
|
|
interface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=gr.components.Textbox(label="Question"), |
|
outputs=gr.components.Audio(type="numpy"), |
|
live=True |
|
) |
|
|
|
interface.launch() |
|
|