File size: 2,030 Bytes
0787439
 
 
 
fe5a693
0787439
 
fe5a693
0787439
 
 
 
 
 
fe5a693
0787439
fe5a693
 
0787439
 
 
a6d28e0
0787439
 
 
cf866b5
0787439
 
 
 
 
 
 
 
 
 
 
 
 
 
fe5a693
0787439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe5a693
0787439
a6d28e0
fe5a693
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import tts, get_model_names, voice_mapping
from io import BytesIO

def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
    edge_tts_voice = voice_mapping.get(selected_voice)
    if not edge_tts_voice:
        return {"error": f"Invalid voice '{selected_voice}'."}, None

    voice_upload_file = None
    if use_uploaded_voice and voice_upload is not None:
        voice_upload_file = voice_upload.read()

    # Call the tts processing function
    info, _, (tgt_sr, audio_output) = tts(
        model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
    )

    # Process audio output to bytes
    audio_bytes = None
    if isinstance(audio_output, np.ndarray):
        byte_io = BytesIO()
        wavfile.write(byte_io, tgt_sr, audio_output)
        byte_io.seek(0)
        audio_bytes = byte_io.read()
    else:
        audio_bytes = audio_output

    audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
    return {"info": info}, audio_data_uri

def get_models():
    return get_model_names()

def get_voices():
    return list(voice_mapping.keys())

# Initialize the Gradio interface with queuing enabled
iface = gr.Interface(
    fn=convert_tts,
    inputs=[
        gr.Dropdown(choices=get_models(), label="Model", interactive=True),
        gr.Textbox(label="Text", placeholder="Enter text here"),
        gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
        gr.Checkbox(label="Use Uploaded Voice"),
        gr.File(label="Voice File")
    ],
    outputs=[
        gr.JSON(label="Info"),
        gr.Textbox(label="Audio URI")
    ],
    title="Text-to-Speech Conversion"
).queue(concurrency_count=10)  # Set concurrency limit based on your server capacity

# Launch the interface
iface.launch(debug=True)