Spaces:
Runtime error
Runtime error
Update fo Russian speech app.py
Browse files
app.py
CHANGED
@@ -13,31 +13,25 @@ model_wav2vec = 'openai/whisper-small' #'voidful/wav2vec2-xlsr-multilingual-56'
|
|
13 |
|
14 |
asr_pipe = pipeline("automatic-speech-recognition", model=model_wav2vec, device=device)
|
15 |
|
16 |
-
#
|
17 |
def translate_audio(audio):
|
18 |
-
outputs = asr_pipe(audio, max_new_tokens=256,
|
19 |
-
generate_kwargs={"task": "translate"})
|
20 |
return outputs["text"]
|
21 |
|
22 |
-
# translation
|
23 |
-
def translate_text(text
|
24 |
-
|
25 |
-
|
26 |
-
model =
|
27 |
-
|
28 |
-
|
29 |
-
outputs = model.generate(inputs, num_beams=4, max_length=50, early_stopping=True)
|
30 |
-
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
31 |
-
return translated_text
|
32 |
|
33 |
# load text-to-speech checkpoint
|
34 |
-
#model = pipeline("text-to-speech", model="voxxer/speecht5_finetuned_commonvoice_ru_translit")
|
35 |
model = VitsModel.from_pretrained("voxxer/speecht5_finetuned_commonvoice_ru_translit")
|
36 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
|
37 |
|
38 |
def synthesise(text):
|
39 |
-
translated_text = translate_text(text
|
40 |
-
translated_text = translate_text(translate_text, 'en', 'ru')
|
41 |
inputs = tokenizer(translated_text, return_tensors="pt")
|
42 |
input_ids = inputs["input_ids"]
|
43 |
with torch.no_grad():
|
@@ -55,7 +49,7 @@ def speech_to_speech_translation(audio):
|
|
55 |
|
56 |
title = "Cascaded STST"
|
57 |
description = """
|
58 |
-
* В начале происходит распознавание речи с помощью модели
|
59 |
* Затем полученный текст переводится сначала на английский с помощью Helsinki-NLP/opus-mt-mul-en, а потом на русский с помощью Helsinki-NLP/opus-mt-en-ru
|
60 |
* На последнем шаге полученный текст озвучивается с помощью fine-tune-говой версии microsoft/speecht5_tts - voxxer/speecht5_finetuned_commonvoice_ru_translit
|
61 |
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Russian. Demo uses facebook/mms-tts-rus model for text-to-speech:
|
|
|
13 |
|
14 |
asr_pipe = pipeline("automatic-speech-recognition", model=model_wav2vec, device=device)
|
15 |
|
16 |
+
# load speech-to-text checkpoint
|
17 |
def translate_audio(audio):
|
18 |
+
outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
|
|
|
19 |
return outputs["text"]
|
20 |
|
21 |
+
# translation to Russian
|
22 |
+
def translate_text(text):
|
23 |
+
# to English - mul en, to Russian - en ru
|
24 |
+
model_mul_en = pipeline("translation", model = "Helsinki-NLP/opus-mt-mul-en")
|
25 |
+
model_en_ru = pipeline("translation", model = "Helsinki-NLP/opus-mt-en-ru")
|
26 |
+
translated_text = model_en_ru(model_mul_en(text)[0]['translation_text'])
|
27 |
+
return translated_text[0]['translation_text']
|
|
|
|
|
|
|
28 |
|
29 |
# load text-to-speech checkpoint
|
|
|
30 |
model = VitsModel.from_pretrained("voxxer/speecht5_finetuned_commonvoice_ru_translit")
|
31 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
|
32 |
|
33 |
def synthesise(text):
|
34 |
+
translated_text = translate_text(text)
|
|
|
35 |
inputs = tokenizer(translated_text, return_tensors="pt")
|
36 |
input_ids = inputs["input_ids"]
|
37 |
with torch.no_grad():
|
|
|
49 |
|
50 |
title = "Cascaded STST"
|
51 |
description = """
|
52 |
+
* В начале происходит распознавание речи с помощью модели openai/whisper-small.
|
53 |
* Затем полученный текст переводится сначала на английский с помощью Helsinki-NLP/opus-mt-mul-en, а потом на русский с помощью Helsinki-NLP/opus-mt-en-ru
|
54 |
* На последнем шаге полученный текст озвучивается с помощью fine-tune-говой версии microsoft/speecht5_tts - voxxer/speecht5_finetuned_commonvoice_ru_translit
|
55 |
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Russian. Demo uses facebook/mms-tts-rus model for text-to-speech:
|