Everton Aleixo commited on
Commit
9578405
1 Parent(s): b9f4b9a

Change asr

Browse files
Files changed (1) hide show
  1. app.py +3 -21
app.py CHANGED
@@ -8,25 +8,7 @@ from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Proce
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
 
10
  # load speech translation checkpoint
11
- # asr_pipe = pipeline("automatic-speech-recognition", model="jonatasgrosman/whisper-large-pt-cv11", device=device)
12
- # asr_pipe.model.config.forced_decoder_ids = (
13
- # asr_pipe.tokenizer.get_decoder_prompt_ids(
14
- # language="pt",
15
- # task="transcribe"
16
- # )
17
- # )
18
-
19
- asr_pipe = pipeline(
20
- "automatic-speech-recognition",
21
- model="jonatasgrosman/whisper-large-pt-cv11"
22
- )
23
-
24
- asr_pipe.model.config.forced_decoder_ids = (
25
- asr_pipe.tokenizer.get_decoder_prompt_ids(
26
- language="pt",
27
- task="transcribe"
28
- )
29
- )
30
 
31
  # load text-to-speech checkpoint and speaker embeddings
32
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
@@ -39,8 +21,8 @@ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze
39
 
40
 
41
  def translate(audio):
42
- outputs = asr_pipe(audio)
43
- print('translate', outputs)
44
  return outputs["text"]
45
 
46
 
 
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
 
10
  # load speech translation checkpoint
11
+ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-medium", device=device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # load text-to-speech checkpoint and speaker embeddings
14
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
 
21
 
22
 
23
  def translate(audio):
24
+ outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language":"portuguese"})
25
+ print('outputs', outputs)
26
  return outputs["text"]
27
 
28