Spaces:

unijoh
/

metaambod

Running

unijoh commited on Jun 15

Commit

0083046

•

1 Parent(s): 514fc2c

Update tts.py

Files changed (1) hide show

tts.py CHANGED Viewed

@@ -16,9 +16,15 @@ try:
     logging.info("Model and processor loaded successfully.")
 except Exception as e:
     logging.error(f"Error loading model or processor: {e}")
 def synthesize_speech(text):
     try:
         inputs = processor(text, return_tensors="pt")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
@@ -26,14 +32,18 @@ def synthesize_speech(text):
         with torch.no_grad():
             speech = model.generate(**inputs)
         logging.info("Speech generated successfully.")
         # Decode the generated speech and save to an audio file
         waveform = speech.cpu().numpy().flatten()
         # Convert waveform to audio format that Gradio can handle
-        sf.write("output.wav", waveform, 16000)
-        return "output.wav"
     except Exception as e:
         logging.error(f"Error during speech synthesis: {e}")
         return None

     logging.info("Model and processor loaded successfully.")
 except Exception as e:
     logging.error(f"Error loading model or processor: {e}")
+    raise
 def synthesize_speech(text):
     try:
+        # Ensure text is not empty
+        if not text.strip():
+            logging.error("Text input is empty.")
+            return None
         inputs = processor(text, return_tensors="pt")
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model.to(device)
         with torch.no_grad():
             speech = model.generate(**inputs)
         logging.info("Speech generated successfully.")
         # Decode the generated speech and save to an audio file
         waveform = speech.cpu().numpy().flatten()
+        # Normalize waveform to the range [-1, 1]
+        waveform = np.clip(waveform, -1.0, 1.0)
         # Convert waveform to audio format that Gradio can handle
+        audio_path = "output.wav"
+        sf.write(audio_path, waveform, 16000)
+        return audio_path
     except Exception as e:
         logging.error(f"Error during speech synthesis: {e}")
         return None