|
import gradio as gr |
|
import base64 |
|
import numpy as np |
|
from scipy.io import wavfile |
|
from voice_processing import parallel_tts, get_model_names |
|
from io import BytesIO |
|
import asyncio |
|
import os |
|
import tempfile |
|
from pathlib import Path |
|
from gradio.data_classes import FileData |
|
|
|
|
|
OUTPUTS_DIR = Path(__file__).parent / "outputs" |
|
TEMP_DIR = Path(__file__).parent / "temp" |
|
UPLOAD_DIR = Path(__file__).parent / "uploaded" |
|
|
|
|
|
for directory in [OUTPUTS_DIR, TEMP_DIR, UPLOAD_DIR]: |
|
directory.mkdir(exist_ok=True) |
|
|
|
async def convert_tts(model_name, audio_file, slang_rate): |
|
if audio_file is None: |
|
return {"error": "No audio file uploaded."}, None |
|
|
|
try: |
|
print(f"Processing audio file: {audio_file}") |
|
|
|
|
|
task = (model_name, None, None, slang_rate, True, audio_file) |
|
|
|
|
|
result = await asyncio.get_event_loop().run_in_executor(None, parallel_tts, [task]) |
|
|
|
if result[0] is None or len(result[0]) != 3: |
|
return {"error": "Processing failed"}, None |
|
|
|
info, _, (tgt_sr, audio_output) = result[0] |
|
|
|
if audio_output is None: |
|
return {"error": "No audio output generated"}, None |
|
|
|
|
|
output_filename = f"output_{os.urandom(4).hex()}.wav" |
|
output_path = str(OUTPUTS_DIR / output_filename) |
|
|
|
try: |
|
if isinstance(audio_output, np.ndarray): |
|
wavfile.write(output_path, tgt_sr, audio_output) |
|
else: |
|
with open(output_path, "wb") as f: |
|
f.write(audio_output) |
|
|
|
print(f"Saved output to: {output_path}") |
|
return {"info": info}, output_path |
|
|
|
except Exception as save_error: |
|
print(f"Error saving output: {save_error}") |
|
return {"error": f"Error saving output: {str(save_error)}"}, None |
|
|
|
except Exception as e: |
|
print(f"Error in convert_tts: {str(e)}") |
|
import traceback |
|
traceback.print_exc() |
|
return {"error": str(e)}, None |
|
|
|
def get_models(): |
|
return get_model_names() |
|
|
|
|
|
iface = gr.Interface( |
|
fn=convert_tts, |
|
inputs=[ |
|
gr.Dropdown(choices=get_models(), label="Model", interactive=True), |
|
gr.Audio(label="Upload Audio", source="upload"), |
|
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), |
|
], |
|
outputs=[ |
|
gr.JSON(label="Info"), |
|
gr.Audio(label="Converted Audio") |
|
], |
|
title="Voice Conversion", |
|
analytics_enabled=False, |
|
cache_examples=False |
|
).queue() |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch( |
|
debug=True, |
|
show_error=True, |
|
max_threads=10, |
|
share=False |
|
) |