vocos-bark / app.py
ylacombe's picture
Update app.py
fc22820
raw
history blame
2.26 kB
import torch
from threading import Thread
from transformers import AutoProcessor
from transformers import set_seed
from utils.vocos_bark import BarkModel
from scipy.io.wavfile import write
from pydub import AudioSegment
import numpy as np
import os
import gradio as gr
import uuid
import io
set_seed(0)
def _grab_best_device(use_gpu=True):
if torch.cuda.device_count() > 0 and use_gpu:
device = "cuda"
else:
device = "cpu"
return device
device = _grab_best_device()
HUB_PATH = "suno/bark"
processor = AutoProcessor.from_pretrained(HUB_PATH)
speaker_embeddings = sorted([key for key in processor.speaker_embeddings.keys() if "speaker" in key])
SAMPLE_RATE = 24_000
# import model
if device == "cpu":
bark = BarkModel.from_pretrained(HUB_PATH)
else:
bark = BarkModel.from_pretrained(HUB_PATH).to(device)
bark = bark.to_bettertransformer()
# streaming inference
def generate_audio(text, voice_preset = None, lag = 0):
if voice_preset not in speaker_embeddings:
voice_preset = None
sentences = [
text,
]
inputs = processor(sentences, voice_preset=voice_preset).to(device)
# Run the generation in a separate thread, so that we can fetch the generated text in a non-blocking way.
waveform = bark.generate(
**inputs, coarse_temperature = 0.8, semantic_temperature = 0.5
)
return (SAMPLE_RATE, waveform.squeeze().cpu().numpy())
# Gradio blocks demo
with gr.Blocks() as demo_blocks:
gr.Markdown("""<h1 align="center">🐶BARK with Vocos</h1>""")
gr.HTML("""<h3 style="text-align:center;">📢Audio Streaming powered by Gradio 🦾! </h3>""")
with gr.Group():
with gr.Row():
inp_text = gr.Textbox(label="What should Bark say?", info="Enter text here")
dd = gr.Dropdown(
speaker_embeddings,
value=None,
label="Available voice presets",
info="Defaults to no speaker embeddings!"
)
with gr.Row():
btn = gr.Button("Bark with Vocos TTS")
with gr.Row():
out_audio = gr.Audio(type="numpy", autoplay=True)
btn.click(generate_audio, [inp_text, dd], out_audio)
demo_blocks.queue().launch(debug=True)