File size: 2,008 Bytes
d359e5f 0869928 d359e5f 9325cfe e94f976 9325cfe 32fb87d 2c87986 32fb87d 2c87986 32fb87d 2c87986 32fb87d 2c87986 32fb87d 2c87986 32fb87d d359e5f 942b6ca d359e5f 8398686 d359e5f eb0534b 32fb87d d359e5f 942b6ca 32fb87d e7a7e70 32fb87d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import tts, get_model_names, voice_mapping
from io import BytesIO
import asyncio
async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
edge_tts_voice = voice_mapping.get(selected_voice)
if not edge_tts_voice:
return {"error": f"Invalid voice '{selected_voice}'."}, None
voice_upload_file = None
if use_uploaded_voice and voice_upload is not None:
with open(voice_upload.name, 'rb') as f:
voice_upload_file = f.read()
info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
)
_, audio_output = tts_output_data
audio_bytes = None
if isinstance(audio_output, np.ndarray):
byte_io = BytesIO()
wavfile.write(byte_io, 40000, audio_output)
byte_io.seek(0)
audio_bytes = byte_io.read()
else:
audio_bytes = audio_output
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
return {"info": info}, audio_data_uri
def get_models():
return get_model_names()
def get_voices():
return list(voice_mapping.keys())
iface = gr.Interface(
fn=convert_tts,
inputs=[
gr.Dropdown(choices=get_models(), label="Model", interactive=True),
gr.Textbox(label="Text", placeholder="Enter text here"),
gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
gr.Checkbox(label="Use Uploaded Voice"),
gr.File(label="Voice File")
],
outputs=[
gr.JSON(label="Info"),
gr.Textbox(label="Audio URI")
],
title="Text-to-Speech Conversion"
).queue(default_concurrency_limit=6) # Set concurrency limit to 6 based on your hardware
iface.launch() |