Spaces:
Runtime error
Runtime error
Upload app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,3 @@
|
|
1 |
-
# -*- coding: utf-8 -*-
|
2 |
-
"""HW3_ml.ipynb
|
3 |
-
|
4 |
-
Automatically generated by Colaboratory.
|
5 |
-
|
6 |
-
Original file is located at
|
7 |
-
https://colab.research.google.com/drive/1z4ht7K9pttbgWmDDnrQhqoZ6SYAiaeUe
|
8 |
-
"""
|
9 |
|
10 |
# !pip -q uninstall gradio -y
|
11 |
# !pip -q install gradio==3.50.2
|
@@ -24,9 +16,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
24 |
# load speech translation checkpoint
|
25 |
asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-xls-r-300m", device=device)
|
26 |
|
27 |
-
|
28 |
-
# load text-to-speech checkpoint and speaker embeddings
|
29 |
-
# processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
30 |
processor = WhisperProcessor.from_pretrained(
|
31 |
"openai/whisper-small")
|
32 |
|
@@ -35,7 +25,6 @@ translator2 = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ru")
|
|
35 |
|
36 |
from transformers import VitsModel, VitsTokenizer
|
37 |
|
38 |
-
# model = pipeline("text-to-speech", model="suno/bark-small")
|
39 |
|
40 |
model = VitsModel.from_pretrained("facebook/mms-tts-rus")
|
41 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
|
@@ -70,11 +59,16 @@ def speech_to_speech_translation(audio):
|
|
70 |
|
71 |
title = "Cascaded STST"
|
72 |
description = """
|
73 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
|
76 |
-
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Russian. Demo uses facebook/mms-tts-rus model for text-to-speech:
|
77 |
-
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
78 |
"""
|
79 |
|
80 |
demo = gr.Blocks()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
# !pip -q uninstall gradio -y
|
3 |
# !pip -q install gradio==3.50.2
|
|
|
16 |
# load speech translation checkpoint
|
17 |
asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-xls-r-300m", device=device)
|
18 |
|
19 |
+
|
|
|
|
|
20 |
processor = WhisperProcessor.from_pretrained(
|
21 |
"openai/whisper-small")
|
22 |
|
|
|
25 |
|
26 |
from transformers import VitsModel, VitsTokenizer
|
27 |
|
|
|
28 |
|
29 |
model = VitsModel.from_pretrained("facebook/mms-tts-rus")
|
30 |
tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
|
|
|
59 |
|
60 |
title = "Cascaded STST"
|
61 |
description = """
|
62 |
+
* В качестве ASR модели была выбрана - https://huggingface.co/voidful/wav2vec2-xlsr-multilingual-56, если поставить фильтры multilingual и wav2vec, то эта модель самая популярная после фейсбуковских -
|
63 |
+
https://imgur.com/UNH5ym1
|
64 |
+
* Далее идет перевод с языка, на котором была запись, на английский, и после этого на русский
|
65 |
+
* Потом переведенный текст воспроизводится на русском языке
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
|
71 |
|
|
|
|
|
72 |
"""
|
73 |
|
74 |
demo = gr.Blocks()
|