|
import gradio as gr |
|
import base64 |
|
import numpy as np |
|
from scipy.io import wavfile |
|
from voice_processing import tts_processor, get_model_names, voice_mapping |
|
from io import BytesIO |
|
import asyncio |
|
import tempfile |
|
import os |
|
|
|
async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload): |
|
edge_tts_voice = voice_mapping.get(selected_voice) |
|
if not edge_tts_voice: |
|
return {"error": f"Invalid voice '{selected_voice}'."}, None |
|
|
|
voice_upload_file = None |
|
if use_uploaded_voice and voice_upload is not None: |
|
with open(voice_upload.name, 'rb') as f: |
|
voice_upload_file = f.read() |
|
|
|
info, edge_tts_output_path, tts_output_data = await tts_processor.tts( |
|
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file |
|
) |
|
|
|
if tts_output_data: |
|
tgt_sr, audio_output = tts_output_data |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: |
|
if isinstance(audio_output, np.ndarray): |
|
wavfile.write(temp_file.name, tgt_sr, audio_output) |
|
else: |
|
temp_file.write(audio_output) |
|
|
|
return {"info": info}, temp_file.name |
|
else: |
|
return {"error": info}, None |
|
|
|
def get_models(): |
|
return get_model_names() |
|
|
|
def get_voices(): |
|
return list(voice_mapping.keys()) |
|
|
|
iface = gr.Interface( |
|
fn=convert_tts, |
|
inputs=[ |
|
gr.Dropdown(choices=get_models(), label="Model", interactive=True), |
|
gr.Textbox(label="Text", placeholder="Enter text here"), |
|
gr.Dropdown(choices=get_voices(), label="Voice", interactive=True), |
|
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), |
|
gr.Checkbox(label="Use Uploaded Voice"), |
|
gr.File(label="Voice File") |
|
], |
|
outputs=[ |
|
gr.JSON(label="Info"), |
|
gr.Audio(label="Generated Audio") |
|
], |
|
title="Text-to-Speech Conversion" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch(debug=True) |
|
|