File size: 3,182 Bytes
06dd398 49e8fad 6dc7b04 516b487 6c8d2ca 10ee224 6c8d2ca 06dd398 6c8d2ca 6dc7b04 6c8d2ca 6dc7b04 6c8d2ca d60199c 6c8d2ca 251af67 6c8d2ca 23e22b5 6c8d2ca d60199c 6c8d2ca 461e4d3 6c8d2ca d60199c 6c8d2ca 23e22b5 06dd398 6c8d2ca 516b487 6c8d2ca 06dd398 6c8d2ca 06dd398 6dc7b04 b39fefc 06dd398 1b123c7 06dd398 516b487 06dd398 49e8fad 6c8d2ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import parallel_tts, get_model_names
import os
import logging
# Set up logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
async def convert_tts(model_name, audio_file, slang_rate):
try:
logger.debug(f"Received request - model: {model_name}, audio: {type(audio_file)}, slang: {slang_rate}")
if audio_file is None:
logger.error("No audio file provided")
return {"error": "No audio file uploaded."}, None
# Log the audio file details
if hasattr(audio_file, 'name'):
logger.debug(f"Audio file name: {audio_file.name}")
logger.debug(f"Audio file type: {type(audio_file)}")
# Create task for parallel processing
task = (model_name, None, None, slang_rate, True, audio_file)
# Process the audio
logger.debug("Starting audio processing")
result = parallel_tts([task])
logger.debug(f"Processing result: {type(result)}")
if not result or result[0] is None:
logger.error("Processing failed - no result")
return {"error": "Processing failed"}, None
info, _, (tgt_sr, audio_output) = result[0]
logger.debug(f"Processing complete - info: {info}")
if audio_output is None:
logger.error("No audio output generated")
return {"error": "No audio output generated"}, None
# Save the output
try:
output_filename = f"output_{os.urandom(4).hex()}.wav"
output_path = os.path.join("outputs", output_filename)
os.makedirs("outputs", exist_ok=True)
if isinstance(audio_output, np.ndarray):
logger.debug(f"Saving numpy array with shape {audio_output.shape}")
wavfile.write(output_path, tgt_sr, audio_output)
else:
logger.debug(f"Saving raw audio data of type {type(audio_output)}")
with open(output_path, "wb") as f:
f.write(audio_output)
logger.debug(f"Successfully saved to {output_path}")
return {"info": info}, output_path
except Exception as save_error:
logger.error(f"Error saving output: {save_error}", exc_info=True)
return {"error": f"Error saving output: {str(save_error)}"}, None
except Exception as e:
logger.error(f"Error in convert_tts: {str(e)}", exc_info=True)
return {"error": str(e)}, None
# Interface definition
iface = gr.Interface(
fn=convert_tts,
inputs=[
gr.Dropdown(choices=get_model_names(), label="Model", interactive=True),
gr.Audio(label="Upload Audio", type="filepath"),
gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
],
outputs=[
gr.JSON(label="Info"),
gr.Audio(label="Converted Audio")
],
title="Voice Conversion"
).queue()
if __name__ == "__main__":
iface.launch(
debug=True,
show_error=True,
max_threads=10
)
|