unijoh commited on
Commit
3e1ff6b
1 Parent(s): 51e6dce

Update tts.py

Browse files
Files changed (1) hide show
  1. tts.py +9 -24
tts.py CHANGED
@@ -1,7 +1,5 @@
1
- import torch
2
- from transformers import AutoTokenizer, AutoModelForTextToWaveform
3
  import logging
4
- import numpy as np
5
  import soundfile as sf
6
 
7
  # Set up logging
@@ -9,13 +7,12 @@ logging.basicConfig(level=logging.DEBUG)
9
 
10
  MODEL_ID = "facebook/mms-tts-fao"
11
 
12
- # Try to load the model and tokenizer
13
  try:
14
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
15
- model = AutoModelForTextToWaveform.from_pretrained(MODEL_ID)
16
- logging.info("Model and tokenizer loaded successfully.")
17
  except Exception as e:
18
- logging.error(f"Error loading model or tokenizer: {e}")
19
  raise
20
 
21
  def synthesize_speech(text):
@@ -25,24 +22,12 @@ def synthesize_speech(text):
25
  logging.error("Text input is empty.")
26
  return None
27
 
28
- inputs = tokenizer(text, return_tensors="pt")
29
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
30
- model.to(device)
31
- inputs = inputs.to(device)
32
 
33
- with torch.no_grad():
34
- outputs = model.generate(**inputs)
35
-
36
- logging.info("Speech generated successfully.")
37
-
38
- # Convert outputs to waveform
39
- waveform = outputs.cpu().numpy().flatten()
40
- # Normalize waveform to the range [-1, 1]
41
- waveform = np.clip(waveform, -1.0, 1.0)
42
-
43
- # Convert waveform to audio format that Gradio can handle
44
  audio_path = "output.wav"
45
- sf.write(audio_path, waveform, 16000)
46
  return audio_path
47
  except Exception as e:
48
  logging.error(f"Error during speech synthesis: {e}")
 
1
+ from transformers import pipeline
 
2
  import logging
 
3
  import soundfile as sf
4
 
5
  # Set up logging
 
7
 
8
  MODEL_ID = "facebook/mms-tts-fao"
9
 
10
+ # Try to create the pipeline
11
  try:
12
+ pipe = pipeline("text-to-speech", model=MODEL_ID)
13
+ logging.info("Pipeline created successfully.")
 
14
  except Exception as e:
15
+ logging.error(f"Error creating pipeline: {e}")
16
  raise
17
 
18
  def synthesize_speech(text):
 
22
  logging.error("Text input is empty.")
23
  return None
24
 
25
+ result = pipe(text)
26
+ waveform = result["waveform"]
 
 
27
 
28
+ # Save waveform to an audio file
 
 
 
 
 
 
 
 
 
 
29
  audio_path = "output.wav"
30
+ sf.write(audio_path, waveform, 16000) # Write the waveform using soundfile
31
  return audio_path
32
  except Exception as e:
33
  logging.error(f"Error during speech synthesis: {e}")