openpecha
/

speecht5-tts-01

Inference Endpoints

Model card Files Files and versions Community

TenzinGayche commited on Oct 23, 2023

Commit

a5deb0e

•

1 Parent(s): c3fc48f

Update handler.py

Files changed (1) hide show

handler.py +1 -26

handler.py CHANGED Viewed

@@ -13,23 +13,7 @@ import re
 import requests
 import os
 from pydub import AudioSegment
-def increase_volume_without_distortion(audio_data, sample_rate, target_dBFS):
-    # Create an AudioSegment from raw audio data
-    audio_segment = AudioSegment(
-        audio_data.tobytes(),
-        frame_rate=sample_rate,
-        sample_width=audio_data.dtype.itemsize,
-        channels=1  # or 2 for stereo
-    )
-    # Normalize the audio level
-    change_in_dBFS = target_dBFS - audio_segment.dBFS
-    normalized_audio = audio_segment.apply_gain(change_in_dBFS)
-    # Convert the AudioSegment back to a numpy array
-    normalized_audio_data = np.array(normalized_audio.get_array_of_samples()).astype(np.int16)
-    return normalized_audio_data
 converter = pyewts.pyewts()
 def download_file(url, destination):
     response = requests.get(url)
@@ -102,15 +86,6 @@ class EndpointHandler():
         speaker_embedding = torch.tensor(speaker_embedding)
         speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
         speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
-        if isinstance(speech, torch.Tensor):
-            speech = speech.numpy()
-    # Increase volume without distortion
-        target_dBFS = -20.0  # Adjust the value according to your requirement
-        speech = increase_volume_without_distortion(speech, 16000, target_dBFS)
         # Create a unique temporary WAV file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
             temp_wav_path = temp_wav_file.name

 import requests
 import os
 from pydub import AudioSegment
 converter = pyewts.pyewts()
 def download_file(url, destination):
     response = requests.get(url)
         speaker_embedding = torch.tensor(speaker_embedding)
         speech = self.model.generate_speech(input_ids.to('cuda'), speaker_embedding.to('cuda'), vocoder=self.vocoder.to('cuda'))
         speech = nr.reduce_noise(y=speech.to('cpu'), sr=16000)
         # Create a unique temporary WAV file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
             temp_wav_path = temp_wav_file.name