|
import gradio as gr |
|
import base64 |
|
import numpy as np |
|
from scipy.io import wavfile |
|
from voice_processing import parallel_tts, get_model_names |
|
import os |
|
import logging |
|
|
|
|
|
logging.basicConfig(level=logging.DEBUG) |
|
logger = logging.getLogger(__name__) |
|
|
|
async def convert_tts(model_name, audio_file, slang_rate): |
|
try: |
|
logger.debug(f"Received request - model: {model_name}, audio: {type(audio_file)}, slang: {slang_rate}") |
|
|
|
if audio_file is None: |
|
logger.error("No audio file provided") |
|
return {"error": "No audio file uploaded."}, None |
|
|
|
|
|
if hasattr(audio_file, 'name'): |
|
logger.debug(f"Audio file name: {audio_file.name}") |
|
logger.debug(f"Audio file type: {type(audio_file)}") |
|
|
|
|
|
task = (model_name, None, None, slang_rate, True, audio_file) |
|
|
|
|
|
logger.debug("Starting audio processing") |
|
result = parallel_tts([task]) |
|
logger.debug(f"Processing result: {type(result)}") |
|
|
|
if not result or result[0] is None: |
|
logger.error("Processing failed - no result") |
|
return {"error": "Processing failed"}, None |
|
|
|
info, _, (tgt_sr, audio_output) = result[0] |
|
logger.debug(f"Processing complete - info: {info}") |
|
|
|
if audio_output is None: |
|
logger.error("No audio output generated") |
|
return {"error": "No audio output generated"}, None |
|
|
|
|
|
try: |
|
output_filename = f"output_{os.urandom(4).hex()}.wav" |
|
output_path = os.path.join("outputs", output_filename) |
|
os.makedirs("outputs", exist_ok=True) |
|
|
|
if isinstance(audio_output, np.ndarray): |
|
logger.debug(f"Saving numpy array with shape {audio_output.shape}") |
|
wavfile.write(output_path, tgt_sr, audio_output) |
|
else: |
|
logger.debug(f"Saving raw audio data of type {type(audio_output)}") |
|
with open(output_path, "wb") as f: |
|
f.write(audio_output) |
|
|
|
logger.debug(f"Successfully saved to {output_path}") |
|
return {"info": info}, output_path |
|
|
|
except Exception as save_error: |
|
logger.error(f"Error saving output: {save_error}", exc_info=True) |
|
return {"error": f"Error saving output: {str(save_error)}"}, None |
|
|
|
except Exception as e: |
|
logger.error(f"Error in convert_tts: {str(e)}", exc_info=True) |
|
return {"error": str(e)}, None |
|
|
|
|
|
iface = gr.Interface( |
|
fn=convert_tts, |
|
inputs=[ |
|
gr.Dropdown(choices=get_model_names(), label="Model", interactive=True), |
|
gr.Audio(label="Upload Audio", type="filepath"), |
|
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"), |
|
], |
|
outputs=[ |
|
gr.JSON(label="Info"), |
|
gr.Audio(label="Converted Audio") |
|
], |
|
title="Voice Conversion" |
|
).queue() |
|
|
|
if __name__ == "__main__": |
|
iface.launch( |
|
debug=True, |
|
show_error=True, |
|
max_threads=10 |
|
) |
|
|