Spaces:
Runtime error
Runtime error
add system info
Browse files
app.py
CHANGED
@@ -8,8 +8,7 @@ import time
|
|
8 |
import psutil
|
9 |
from mtranslate import translate
|
10 |
|
11 |
-
|
12 |
-
MODEL_NAME = "cahya/whisper-medium-id" #this always needs to stay in line 8 :D sorry for the hackiness
|
13 |
lang = "id"
|
14 |
title = "Indonesian Whisperer"
|
15 |
description = "Cross Language Speech to Speech (Indonesian/English to 25 other languages) using OpenAI Whisper and Coqui TTS"
|
@@ -43,7 +42,6 @@ languages = {
|
|
43 |
'Maltese': 'mt'
|
44 |
}
|
45 |
|
46 |
-
|
47 |
device = 0 if torch.cuda.is_available() else "cpu"
|
48 |
|
49 |
pipe = pipeline(
|
@@ -82,6 +80,7 @@ coquiTTS = CoquiTTS()
|
|
82 |
|
83 |
def tts(language: str, audio_microphone: str, audio_file: str):
|
84 |
language = languages[language]
|
|
|
85 |
print(f"### {datetime.now()} TTS", language, audio_file)
|
86 |
transcription = transcribe(audio_microphone, audio_file)
|
87 |
print(f"### {datetime.now()} transcribed:", transcription)
|
@@ -89,8 +88,15 @@ def tts(language: str, audio_microphone: str, audio_file: str):
|
|
89 |
# return output
|
90 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
91 |
coquiTTS.get_tts(translation, fp, speaker={"language": language})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
print(f"### {datetime.now()} fp.name:", fp.name)
|
93 |
-
return transcription, translation, fp.name
|
94 |
|
95 |
|
96 |
with gr.Blocks() as blocks:
|
@@ -98,12 +104,12 @@ with gr.Blocks() as blocks:
|
|
98 |
+ title
|
99 |
+ "</h1>")
|
100 |
gr.Markdown(description)
|
101 |
-
with gr.Row()
|
102 |
-
with gr.Column()
|
103 |
audio_microphone = gr.Audio(label="Microphone", source="microphone", type="filepath", optional=True)
|
104 |
audio_upload = gr.Audio(label="Upload", source="upload", type="filepath", optional=True)
|
105 |
language = gr.Dropdown([lang for lang in languages.keys()], label="Target Language", value="English")
|
106 |
-
with gr.Row():
|
107 |
submit = gr.Button("Submit", variant="primary")
|
108 |
examples = gr.Examples(examples=["data/Jokowi - 2022.mp3", "data/Soekarno - 1963.mp3", "data/JFK.mp3"],
|
109 |
label="Examples", inputs=[audio_upload])
|
@@ -111,23 +117,19 @@ with gr.Blocks() as blocks:
|
|
111 |
text_source = gr.Textbox(label="Source Language")
|
112 |
text_target = gr.Textbox(label="Target Language")
|
113 |
audio = gr.Audio(label="Target Audio", interactive=False)
|
|
|
|
|
114 |
|
115 |
-
memory = psutil.virtual_memory()
|
116 |
gr.Markdown(info)
|
117 |
-
system_status = info = f"""
|
118 |
-
*Memory: {memory.total/(1024*1024*1024):.2f}GB, used: {memory.percent}%, available: {memory.available/(1024*1024*1024):.2f}GB*
|
119 |
-
"""
|
120 |
-
gr.Markdown(system_status)
|
121 |
gr.Markdown("<center>"
|
122 |
-
+f'<a href="https://github.com/cahya-wirawan/indonesian-whisperer"><img src={badge} alt="visitors badge"/></a>'
|
123 |
-
+"</center>")
|
124 |
|
125 |
# actions
|
126 |
submit.click(
|
127 |
tts,
|
128 |
[language, audio_microphone, audio_upload],
|
129 |
-
[text_source, text_target, audio],
|
130 |
)
|
131 |
|
132 |
blocks.launch()
|
133 |
-
|
|
|
8 |
import psutil
|
9 |
from mtranslate import translate
|
10 |
|
11 |
+
MODEL_NAME = "cahya/whisper-medium-id" # this always needs to stay in line 8 :D sorry for the hackiness
|
|
|
12 |
lang = "id"
|
13 |
title = "Indonesian Whisperer"
|
14 |
description = "Cross Language Speech to Speech (Indonesian/English to 25 other languages) using OpenAI Whisper and Coqui TTS"
|
|
|
42 |
'Maltese': 'mt'
|
43 |
}
|
44 |
|
|
|
45 |
device = 0 if torch.cuda.is_available() else "cpu"
|
46 |
|
47 |
pipe = pipeline(
|
|
|
80 |
|
81 |
def tts(language: str, audio_microphone: str, audio_file: str):
|
82 |
language = languages[language]
|
83 |
+
time_start = time.time()
|
84 |
print(f"### {datetime.now()} TTS", language, audio_file)
|
85 |
transcription = transcribe(audio_microphone, audio_file)
|
86 |
print(f"### {datetime.now()} transcribed:", transcription)
|
|
|
88 |
# return output
|
89 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
90 |
coquiTTS.get_tts(translation, fp, speaker={"language": language})
|
91 |
+
time_end = time.time()
|
92 |
+
time_diff = time_end - time_start
|
93 |
+
memory = psutil.virtual_memory()
|
94 |
+
system_info = f"""
|
95 |
+
*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*
|
96 |
+
*Processing time: {time_diff:.5} seconds*
|
97 |
+
"""
|
98 |
print(f"### {datetime.now()} fp.name:", fp.name)
|
99 |
+
return transcription, translation, fp.name, system_info
|
100 |
|
101 |
|
102 |
with gr.Blocks() as blocks:
|
|
|
104 |
+ title
|
105 |
+ "</h1>")
|
106 |
gr.Markdown(description)
|
107 |
+
with gr.Row(): # equal_height=False
|
108 |
+
with gr.Column(): # variant="panel"
|
109 |
audio_microphone = gr.Audio(label="Microphone", source="microphone", type="filepath", optional=True)
|
110 |
audio_upload = gr.Audio(label="Upload", source="upload", type="filepath", optional=True)
|
111 |
language = gr.Dropdown([lang for lang in languages.keys()], label="Target Language", value="English")
|
112 |
+
with gr.Row(): # mobile_collapse=False
|
113 |
submit = gr.Button("Submit", variant="primary")
|
114 |
examples = gr.Examples(examples=["data/Jokowi - 2022.mp3", "data/Soekarno - 1963.mp3", "data/JFK.mp3"],
|
115 |
label="Examples", inputs=[audio_upload])
|
|
|
117 |
text_source = gr.Textbox(label="Source Language")
|
118 |
text_target = gr.Textbox(label="Target Language")
|
119 |
audio = gr.Audio(label="Target Audio", interactive=False)
|
120 |
+
memory = psutil.virtual_memory()
|
121 |
+
system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
|
122 |
|
|
|
123 |
gr.Markdown(info)
|
|
|
|
|
|
|
|
|
124 |
gr.Markdown("<center>"
|
125 |
+
+ f'<a href="https://github.com/cahya-wirawan/indonesian-whisperer"><img src={badge} alt="visitors badge"/></a>'
|
126 |
+
+ "</center>")
|
127 |
|
128 |
# actions
|
129 |
submit.click(
|
130 |
tts,
|
131 |
[language, audio_microphone, audio_upload],
|
132 |
+
[text_source, text_target, audio, system_info],
|
133 |
)
|
134 |
|
135 |
blocks.launch()
|
|