Spaces:

Gregniuki
/

Pipertts

Running

App Files Files Community

Gregniuki commited on Oct 8, 2023

Commit

94f9450

1 Parent(s): e13e1dd

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -14

app.py CHANGED Viewed

@@ -351,18 +351,24 @@ def audio_float_to_int16(
     audio_norm = audio_norm.astype("int16")
     return audio_norm
-def inferencing(model, config, sid, line, length_scale, noise_scale, noise_scale_w, auto_play=True):
     audios = []
     if config["phoneme_type"] == "PhonemeType.ESPEAK":
         config["phoneme_type"] = "espeak"
-    text = phonemize(config, line)
     for phonemes in text:
-        phoneme_ids = phonemes_to_ids(config, phonemes)
         num_speakers = config["num_speakers"]
-        if num_speakers == 1:
-            speaker_id = None  # for now
-        else:
-            speaker_id = sid
         text = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
         text_lengths = np.array([text.shape[1]], dtype=np.int64)
         scales = np.array(
@@ -371,8 +377,8 @@ def inferencing(model, config, sid, line, length_scale, noise_scale, noise_scale
         )
         sid = None
         if speaker_id is not None:
-            sid = np.array([int(speaker_id)], dtype=np.int64)  # Ensure sid is a 1D array
         audio = model.run(
             None,
             {
@@ -382,18 +388,15 @@ def inferencing(model, config, sid, line, length_scale, noise_scale, noise_scale
                 "sid": sid,
             },
         )[0].squeeze((0, 1))
-        audio = audio_float_to_int16(audio.squeeze())
         audios.append(audio)
     merged_audio = np.concatenate(audios)
     sample_rate = config["audio"]["sample_rate"]
     temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
     sf.write(temp_audio_path, merged_audio, config["audio"]["sample_rate"])
     audio = AudioSegment.from_mp3(temp_audio_path)
     return audio
-#    return FileResponse(temp_audio_path)
-    # Return the audio file as a FastAPI response
-  #  display(Markdown(f"{line}"))
-   # display(Audio(merged_audio, rate=sample_rate, autoplay=auto_play))
 def denoise(
     audio: np.ndarray, bias_spec: np.ndarray, denoiser_strength: float

     audio_norm = audio_norm.astype("int16")
     return audio_norm
+def inferencing(model, config, selected_speaker_id, line, length_scale, noise_scale, noise_scale_w, auto_play=True):
     audios = []
+    # Ensure selected_speaker_id is a valid integer or handle it gracefully
+    try:
+        speaker_id = int(selected_speaker_id)
+    except ValueError:
+        # Handle the case where selected_speaker_id is not a valid integer
+        speaker_id = None  # Use a default value or handle it differently
     if config["phoneme_type"] == "PhonemeType.ESPEAK":
         config["phoneme_type"] = "espeak"
+    text = phonemize(config, line)  # Make sure phonemize function is defined
     for phonemes in text:
+        phoneme_ids = phonemes_to_ids(config, phonemes)  # Make sure phonemes_to_ids function is defined
         num_speakers = config["num_speakers"]
         text = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
         text_lengths = np.array([text.shape[1]], dtype=np.int64)
         scales = np.array(
         )
         sid = None
         if speaker_id is not None:
+            sid = np.array([speaker_id], dtype=np.int64)  # Ensure sid is a 1D array
         audio = model.run(
             None,
             {
                 "sid": sid,
             },
         )[0].squeeze((0, 1))
+        audio = audio_float_to_int16(audio.squeeze())  # Make sure audio_float_to_int16 is defined
         audios.append(audio)
     merged_audio = np.concatenate(audios)
     sample_rate = config["audio"]["sample_rate"]
     temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
     sf.write(temp_audio_path, merged_audio, config["audio"]["sample_rate"])
     audio = AudioSegment.from_mp3(temp_audio_path)
     return audio
 def denoise(
     audio: np.ndarray, bias_spec: np.ndarray, denoiser_strength: float