unijoh commited on
Commit
ba0da04
1 Parent(s): 0455ecb

Update tts.py

Browse files
Files changed (1) hide show
  1. tts.py +29 -12
tts.py CHANGED
@@ -1,22 +1,39 @@
1
- import numpy as np
2
- import torchaudio
3
  import logging
 
 
4
 
5
  # Set up logging
6
  logging.basicConfig(level=logging.DEBUG)
7
 
 
 
 
 
 
 
 
 
 
 
8
  def synthesize_speech(text):
9
  try:
10
- # Generate a simple sine wave for testing
11
- sr = 16000
12
- t = np.linspace(0, 1, sr)
13
- waveform = 0.5 * np.sin(2 * np.pi * 440 * t).astype(np.float32)
 
 
 
 
 
14
 
15
- # Save the sine wave to a file
16
- file_path = "/tmp/output.wav"
17
- torchaudio.save(file_path, torch.tensor(waveform).unsqueeze(0), sr)
18
- logging.info(f"Test audio file saved successfully at {file_path}.")
19
- return file_path
20
  except Exception as e:
21
- logging.error(f"Error during test audio generation: {e}")
22
  return None
 
1
+ import torch
2
+ from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor
3
  import logging
4
+ import numpy as np
5
+ import soundfile as sf
6
 
7
  # Set up logging
8
  logging.basicConfig(level=logging.DEBUG)
9
 
10
+ MODEL_ID = "microsoft/speecht5_tts"
11
+
12
+ # Try to load the model and processor
13
+ try:
14
+ processor = SpeechT5Processor.from_pretrained(MODEL_ID)
15
+ model = SpeechT5ForTextToSpeech.from_pretrained(MODEL_ID)
16
+ logging.info("Model and processor loaded successfully.")
17
+ except Exception as e:
18
+ logging.error(f"Error loading model or processor: {e}")
19
+
20
  def synthesize_speech(text):
21
  try:
22
+ inputs = processor(text, return_tensors="pt")
23
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
+ model.to(device)
25
+ inputs = inputs.to(device)
26
+
27
+ with torch.no_grad():
28
+ speech = model.generate(**inputs)
29
+
30
+ logging.info("Speech generated successfully.")
31
 
32
+ # Decode the generated speech and save to an audio file
33
+ waveform = speech.cpu().numpy().flatten()
34
+ # Convert waveform to audio format that Gradio can handle
35
+ sf.write("output.wav", waveform, 16000)
36
+ return "output.wav"
37
  except Exception as e:
38
+ logging.error(f"Error during speech synthesis: {e}")
39
  return None