Update app.py
Browse files
app.py
CHANGED
@@ -3,10 +3,22 @@ import numpy as np
|
|
3 |
import torch
|
4 |
from transformers import pipeline, VitsModel, AutoTokenizer, AutoTokenizer
|
5 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
|
|
|
|
|
6 |
|
7 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
#vist_model = VitsModel.from_pretrained("facebook/mms-tts-spa")
|
12 |
#vist_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-spa")
|
|
|
3 |
import torch
|
4 |
from transformers import pipeline, VitsModel, AutoTokenizer, AutoTokenizer
|
5 |
from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Processor
|
6 |
+
from transformers import WhisperTokenizer, GenerationConfig
|
7 |
+
|
8 |
|
9 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
10 |
|
11 |
+
|
12 |
+
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-base")
|
13 |
+
generation_config = GenerationConfig.from_pretrained("openai/whisper-base")
|
14 |
+
|
15 |
+
generation_config.forced_decoder_ids
|
16 |
+
|
17 |
+
tokenizer.decode(generation_config.forced_decoder_ids[1][1])
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device)
|
22 |
|
23 |
#vist_model = VitsModel.from_pretrained("facebook/mms-tts-spa")
|
24 |
#vist_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-spa")
|