yujiro666 commited on
Commit
1622d47
·
1 Parent(s): 9ff1388

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -15
app.py CHANGED
@@ -1,11 +1,3 @@
1
- # -*- coding: utf-8 -*-
2
- """HW3_ml.ipynb
3
-
4
- Automatically generated by Colaboratory.
5
-
6
- Original file is located at
7
- https://colab.research.google.com/drive/1z4ht7K9pttbgWmDDnrQhqoZ6SYAiaeUe
8
- """
9
 
10
  # !pip -q uninstall gradio -y
11
  # !pip -q install gradio==3.50.2
@@ -24,9 +16,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
24
  # load speech translation checkpoint
25
  asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-xls-r-300m", device=device)
26
 
27
- # !pip -q install sentencepiece
28
- # load text-to-speech checkpoint and speaker embeddings
29
- # processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
30
  processor = WhisperProcessor.from_pretrained(
31
  "openai/whisper-small")
32
 
@@ -35,7 +25,6 @@ translator2 = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ru")
35
 
36
  from transformers import VitsModel, VitsTokenizer
37
 
38
- # model = pipeline("text-to-speech", model="suno/bark-small")
39
 
40
  model = VitsModel.from_pretrained("facebook/mms-tts-rus")
41
  tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
@@ -70,11 +59,16 @@ def speech_to_speech_translation(audio):
70
 
71
  title = "Cascaded STST"
72
  description = """
73
- * Данная модель распознает текст на 56 языках
 
 
 
 
 
 
 
74
 
75
 
76
- Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Russian. Demo uses facebook/mms-tts-rus model for text-to-speech:
77
- ![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
78
  """
79
 
80
  demo = gr.Blocks()
 
 
 
 
 
 
 
 
 
1
 
2
  # !pip -q uninstall gradio -y
3
  # !pip -q install gradio==3.50.2
 
16
  # load speech translation checkpoint
17
  asr_pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-xls-r-300m", device=device)
18
 
19
+
 
 
20
  processor = WhisperProcessor.from_pretrained(
21
  "openai/whisper-small")
22
 
 
25
 
26
  from transformers import VitsModel, VitsTokenizer
27
 
 
28
 
29
  model = VitsModel.from_pretrained("facebook/mms-tts-rus")
30
  tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-rus")
 
59
 
60
  title = "Cascaded STST"
61
  description = """
62
+ * В качестве ASR модели была выбрана - https://huggingface.co/voidful/wav2vec2-xlsr-multilingual-56, если поставить фильтры multilingual и wav2vec, то эта модель самая популярная после фейсбуковских -
63
+ https://imgur.com/UNH5ym1
64
+ * Далее идет перевод с языка, на котором была запись, на английский, и после этого на русский
65
+ * Потом переведенный текст воспроизводится на русском языке
66
+
67
+
68
+
69
+
70
 
71
 
 
 
72
  """
73
 
74
  demo = gr.Blocks()