File size: 2,008 Bytes
d359e5f
 
 
 
0869928
d359e5f
9325cfe
e94f976
9325cfe
32fb87d
 
 
2c87986
32fb87d
 
 
 
2c87986
32fb87d
 
 
2c87986
32fb87d
2c87986
32fb87d
 
 
 
 
 
 
 
2c87986
32fb87d
 
d359e5f
 
 
 
 
 
 
 
942b6ca
d359e5f
8398686
 
 
 
 
 
d359e5f
 
eb0534b
32fb87d
d359e5f
942b6ca
32fb87d
e7a7e70
32fb87d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
import base64
import numpy as np
from scipy.io import wavfile
from voice_processing import tts, get_model_names, voice_mapping
from io import BytesIO
import asyncio

async def convert_tts(model_name, tts_text, selected_voice, slang_rate, use_uploaded_voice, voice_upload):
    edge_tts_voice = voice_mapping.get(selected_voice)
    if not edge_tts_voice:
        return {"error": f"Invalid voice '{selected_voice}'."}, None

    voice_upload_file = None
    if use_uploaded_voice and voice_upload is not None:
        with open(voice_upload.name, 'rb') as f:
            voice_upload_file = f.read()

    info, edge_tts_output_path, tts_output_data, edge_output_file = await tts(
        model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
    )

    _, audio_output = tts_output_data

    audio_bytes = None
    if isinstance(audio_output, np.ndarray):
        byte_io = BytesIO()
        wavfile.write(byte_io, 40000, audio_output)
        byte_io.seek(0)
        audio_bytes = byte_io.read()
    else:
        audio_bytes = audio_output

    audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
    return {"info": info}, audio_data_uri

def get_models():
    return get_model_names()

def get_voices():
    return list(voice_mapping.keys())

iface = gr.Interface(
    fn=convert_tts,
    inputs=[
        gr.Dropdown(choices=get_models(), label="Model", interactive=True),
        gr.Textbox(label="Text", placeholder="Enter text here"),
        gr.Dropdown(choices=get_voices(), label="Voice", interactive=True),
        gr.Slider(minimum=0, maximum=1, step=0.01, label="Slang Rate"),
        gr.Checkbox(label="Use Uploaded Voice"),
        gr.File(label="Voice File")
    ],
    outputs=[
        gr.JSON(label="Info"),
        gr.Textbox(label="Audio URI")
    ],
    title="Text-to-Speech Conversion"
).queue(default_concurrency_limit=6)  # Set concurrency limit to 6 based on your hardware

iface.launch()