Ester Molinari
commited on
Commit
•
f285728
1
Parent(s):
94a1a39
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
|
11 |
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
|
12 |
|
13 |
# load text-to-speech checkpoint and speaker embeddings
|
14 |
-
model_id = "burraco135/
|
15 |
# pipe = pipeline("automatic-speech-recognition", model=model_id)
|
16 |
model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
|
17 |
processor = SpeechT5Processor.from_pretrained(model_id)
|
@@ -57,7 +57,7 @@ def speech_to_speech_translation(audio):
|
|
57 |
|
58 |
title = "Cascaded STST"
|
59 |
description = """
|
60 |
-
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in German. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and [burraco135/
|
61 |
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech, fine-tuned in Italian Audio dataset:
|
62 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
63 |
"""
|
|
|
11 |
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
|
12 |
|
13 |
# load text-to-speech checkpoint and speaker embeddings
|
14 |
+
model_id = "burraco135/speecht5_finetuned_voxpopuli_it" # update with your model id
|
15 |
# pipe = pipeline("automatic-speech-recognition", model=model_id)
|
16 |
model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
|
17 |
processor = SpeechT5Processor.from_pretrained(model_id)
|
|
|
57 |
|
58 |
title = "Cascaded STST"
|
59 |
description = """
|
60 |
+
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in German. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and [burraco135/speecht5_finetuned_voxpopuli_it](https://huggingface.co/burraco135/speecht5_finetuned_voxpopuli_it) checkpoint for text-to-speech, which is based on Microsoft's
|
61 |
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech, fine-tuned in Italian Audio dataset:
|
62 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
63 |
"""
|