File size: 2,018 Bytes
4d9475f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
import tempfile
import os
from TTS.config import load_config
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
from TTS.utils.download import download_url

# Define constants
MAX_TXT_LEN = 800
MODEL_INFO = [
    # ["Model Name", "Model File", "Config File", "URL"]
    # Add other models in the same format
    ["vits-espeak-57000", "checkpoint_57000.pth", "config.json", "https://huggingface.co/mhrahmani/persian-tts-vits-0/tree/main"],
    # ...
]

# Download models
def download_models():
    for model_name, model_file, config_file, url in MODEL_INFO:
        directory = model_name
        os.makedirs(directory, exist_ok=True)
        download_url(f"{url}{model_file}", directory, str(model_file))
        download_url(f"{url}{config_file}", directory, "config.json")

# Load a model and perform TTS
def synthesize_speech(text, model_name):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        st.warning(f"Input text was truncated to {MAX_TXT_LEN} characters.")

    synthesizer = Synthesizer(f"{model_name}/best_model.pth", f"{model_name}/config.json")
    if synthesizer is None:
        st.error("Model not found!")
        return None
    
    wavs = synthesizer.tts(text)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name

# Streamlit app
def main():
    st.title('persian tts playground')
    st.markdown("""
    Persian TTS Demo)
    """)
    
    text_input = st.text_area("Enter Text to Synthesize:", "زین همرهان سست عناصر، دلم گرفت.")
    model_name = st.selectbox("Pick a TTS Model", [info[0] for info in MODEL_INFO], index=1)
    
    if st.button('Synthesize'):
        audio_file = synthesize_speech(text_input, model_name)
        if audio_file:
            st.audio(audio_file, format='audio/wav')

# Download models and run the Streamlit app
if __name__ == "__main__":
    download_models()
    main()