ykirpichev commited on
Commit
3bfb858
1 Parent(s): 1025309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -18,8 +18,8 @@ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(dev
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
19
 
20
 
21
- model = VitsModel.from_pretrained("Matthijs/mms-tts-deu").to(device)
22
- tokenizer = VitsTokenizer.from_pretrained("Matthijs/mms-tts-deu")
23
 
24
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
25
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
@@ -31,16 +31,16 @@ def translate(audio):
31
 
32
 
33
  def synthesise(text):
34
- inputs = tokenizer(text, return_tensors="pt")
35
  input_ids = inputs["input_ids"]
36
 
37
 
38
  with torch.no_grad():
39
- outputs = model(input_ids)
40
- print(outputs)
41
  inputs = processor(text=text, return_tensors="pt")
42
  speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
43
- print(speech)
44
  return speech.cpu()
45
 
46
 
 
18
  vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
19
 
20
 
21
+ model_mms = VitsModel.from_pretrained("Matthijs/mms-tts-deu").to(device)
22
+ tokenizer_mms = VitsTokenizer.from_pretrained("Matthijs/mms-tts-deu")
23
 
24
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
25
  speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 
31
 
32
 
33
  def synthesise(text):
34
+ inputs = tokenizer_mms(text, return_tensors="pt")
35
  input_ids = inputs["input_ids"]
36
 
37
 
38
  with torch.no_grad():
39
+ outputs = model_mms(input_ids)
40
+ print("mms model", outputs)
41
  inputs = processor(text=text, return_tensors="pt")
42
  speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
43
+ print("speecht5 model", speech)
44
  return speech.cpu()
45
 
46