File size: 2,908 Bytes
f2a547a
 
 
 
 
 
 
c8c664f
f2a547a
 
 
 
1bce0f9
 
 
f2a547a
 
60761c3
 
 
f2a547a
 
 
 
 
 
 
 
 
 
 
 
60761c3
 
f2a547a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bce0f9
 
 
 
 
f2a547a
5202de8
f2a547a
 
 
 
 
5202de8
 
f2a547a
 
 
 
c8c664f
 
 
 
f2a547a
 
 
 
5202de8
 
f2a547a
 
 
 
 
5202de8
f2a547a
c8c664f
f2a547a
 
 
 
c8c664f
f2a547a
 
 
 
 
 
c8c664f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from TTS.api import TTS
from bs4 import BeautifulSoup
import requests
import streamlit as st
import tempfile
import os
import json
import datetime

with open('config.json', 'r') as f:
    config = json.load(f)

APP_NAME = config['APP_NAME']
APP_LOGO = config['APP_LOGO']
APP_DESCRIPTION = config['APP_DESCRIPTION']
LANGUAGES_URL = config['LANGUAGES_URL']

def contains_only_ascii(input_string):
    return all(ord(char) < 128 for char in input_string)

def get_iso_languages():
    response = requests.get(LANGUAGES_URL)
    soup = BeautifulSoup(response.text, 'html.parser')

    p_tags = soup.find_all('p')

    iso_language_dict = {}

    for p_tag in p_tags[1:]:  # Skipping the first <p> which contains the header
        parts = p_tag.get_text().split()
        if len(parts) == 2:
            iso_code, language_name = parts
            if contains_only_ascii(language_name):
                iso_language_dict[language_name] = iso_code

    return iso_language_dict

def create_temp_file(input_wav):
    temp_file = tempfile.NamedTemporaryFile(delete=False)
    temp_file.write(input_wav.read())
    return temp_file

def remove_temp_file(temp_file):
    temp_file.close()
    os.remove(temp_file.name)

def update_progress(percent, text):
    progress_bar.progress(percent)
    status_text.text(text)

iso_languages = get_iso_languages()
languages = list(iso_languages.keys())

st.set_page_config(page_title=APP_NAME)
st.title(APP_NAME)
st.image(APP_LOGO, use_column_width=True)
st.markdown(APP_DESCRIPTION)

language = st.selectbox('Select a language', languages)
prompt = st.text_input('Enter your prompt')
input_wav = st.file_uploader("Upload a WAV file", type=["wav"])

if input_wav:
    if not input_wav or input_wav is None:
        st.error('Please upload wav input audio')
    elif not prompt:
        st.error('Please write prompt')
    else:
        progress_bar = st.progress(0)
        status_text = st.empty()

        current_datetime = datetime.datetime.now()
        formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H%M%S")
        output_filename = f"recording_{formatted_datetime}.wav"

        temp_file = create_temp_file(input_wav)

        iso_code = iso_languages[language]

        print(f'Language: {language}, prompt: {prompt}')

        update_progress(0, 'Loading TTS model...')
        api = TTS(f"tts_models/{iso_code}/fairseq/vits")
        
        update_progress(50, 'Generating audio...')
        api.tts_with_vc_to_file(
            prompt,
            speaker_wav=temp_file.name,
            file_path=output_filename
        )

        remove_temp_file(temp_file)

        audio_file = open(output_filename, 'rb')
        audio_bytes = audio_file.read()

        update_progress(100, 'Audio generated successfully!')

        st.audio(audio_bytes, format='audio/wav')

        st.download_button('Download WAV', data=audio_bytes, file_name='output.wav')