ykirpichev commited on
Commit
4f64cb9
1 Parent(s): e5cefdc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -38,7 +38,7 @@ def synthesise(text):
38
  with torch.no_grad():
39
  outputs = model_mms(input_ids)
40
  print("mms model", outputs)
41
- print(outputs.audio[0])
42
  return outputs.audio[0].cpu()
43
  inputs = processor(text=text, return_tensors="pt")
44
  speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
@@ -49,15 +49,7 @@ def synthesise(text):
49
  def speech_to_speech_translation(audio):
50
  translated_text = translate(audio)
51
  synthesised_speech = synthesise(translated_text)
52
- # (((speech["audio"].cpu().numpy()) + 1) / 2.)* 32767
53
- print(synthesised_speech)
54
- synthesised_speech_numpy = synthesised_speech.numpy()
55
- synthesised_speech_numpy += np.min(synthesised_speech_numpy)
56
- synthesised_speech_numpy /= np.max(synthesised_speech_numpy)
57
- # synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
58
- synthesised_speech = np.clip((synthesised_speech_numpy*32767) .astype(np.int16), 0, 32767)
59
- print(synthesised_speech)
60
- # synthesised_speech = (((synthesised_speech.numpy() + 1) / 2.0) * 32767).astype(np.int16)
61
  return 16000, synthesised_speech
62
 
63
 
 
38
  with torch.no_grad():
39
  outputs = model_mms(input_ids)
40
  print("mms model", outputs)
41
+ # print(outputs.audio[0])
42
  return outputs.audio[0].cpu()
43
  inputs = processor(text=text, return_tensors="pt")
44
  speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
 
49
  def speech_to_speech_translation(audio):
50
  translated_text = translate(audio)
51
  synthesised_speech = synthesise(translated_text)
52
+ synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
 
 
 
 
 
 
 
 
53
  return 16000, synthesised_speech
54
 
55