fargerm commited on
Commit
f4e544b
·
verified ·
1 Parent(s): 0010eeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import streamlit as st
2
- from transformers import MarianMTModel, MarianTokenizer, pipeline
 
 
3
  from io import BytesIO
4
 
5
  # Define the language options
@@ -35,14 +37,20 @@ def translate_text(text, target_lang):
35
  return translated_text
36
 
37
  def synthesize_speech(text, lang):
 
 
 
 
 
 
 
 
38
  # Synthesize speech
39
- tts_pipeline = pipeline("text-to-speech", model="microsoft/speecht5_tts")
40
- try:
41
- audio = tts_pipeline(text)
42
- audio_bytes = BytesIO(audio["audio"])
43
- return audio_bytes, None
44
- except Exception as e:
45
- return None, str(e)
46
 
47
  if st.button("Translate and Synthesize Speech"):
48
  # Perform translation
@@ -58,3 +66,4 @@ if st.button("Translate and Synthesize Speech"):
58
 
59
 
60
 
 
 
1
  import streamlit as st
2
+ from transformers import MarianMTModel, MarianTokenizer, SpeechT5Processor, SpeechT5ForTextToSpeech
3
+ from datasets import load_dataset
4
+ import torch
5
  from io import BytesIO
6
 
7
  # Define the language options
 
37
  return translated_text
38
 
39
  def synthesize_speech(text, lang):
40
+ # Load the TTS model and processor
41
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
42
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
43
+
44
+ # Load speaker embeddings
45
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
46
+ speaker_embeddings = torch.tensor(embeddings_dataset[0]["xvector"]).unsqueeze(0)
47
+
48
  # Synthesize speech
49
+ inputs = processor(text=text, return_tensors="pt")
50
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings)
51
+
52
+ audio_bytes = BytesIO(speech.numpy())
53
+ return audio_bytes, None
 
 
54
 
55
  if st.button("Translate and Synthesize Speech"):
56
  # Perform translation
 
66
 
67
 
68
 
69
+