Spaces:

Gregniuki
/

Pipertts

Running

App Files Files Community

Gregniuki commited on Oct 8, 2023

Commit

e23953c

1 Parent(s): c7b8201

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -17

app.py CHANGED Viewed

@@ -351,24 +351,19 @@ def audio_float_to_int16(
     audio_norm = audio_norm.astype("int16")
     return audio_norm
-def inferencing(model, config, selected_speaker_id, line, length_scale, noise_scale, noise_scale_w, auto_play=True):
     audios = []
-    # Ensure selected_speaker_id is a valid integer or handle it gracefully
-    try:
-        speaker_id = int(selected_speaker_id)
-    except ValueError:
-        # Handle the case where selected_speaker_id is not a valid integer
-        speaker_id = None  # Use a default value or handle it differently
     if config["phoneme_type"] == "PhonemeType.ESPEAK":
         config["phoneme_type"] = "espeak"
-    text = phonemize(config, line)  # Make sure phonemize function is defined
     for phonemes in text:
-        phoneme_ids = phonemes_to_ids(config, phonemes)  # Make sure phonemes_to_ids function is defined
         num_speakers = config["num_speakers"]
         text = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
         text_lengths = np.array([text.shape[1]], dtype=np.int64)
         scales = np.array(
@@ -377,8 +372,7 @@ def inferencing(model, config, selected_speaker_id, line, length_scale, noise_sc
         )
         sid = None
         if speaker_id is not None:
-            sid = np.array([speaker_id], dtype=np.int64)  # Ensure sid is a 1D array
         audio = model.run(
             None,
             {
@@ -388,9 +382,8 @@ def inferencing(model, config, selected_speaker_id, line, length_scale, noise_sc
                 "sid": sid,
             },
         )[0].squeeze((0, 1))
-        audio = audio_float_to_int16(audio.squeeze())  # Make sure audio_float_to_int16 is defined
         audios.append(audio)
     merged_audio = np.concatenate(audios)
     sample_rate = config["audio"]["sample_rate"]
     temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")

     audio_norm = audio_norm.astype("int16")
     return audio_norm
+def inferencing(model, config, sid, line, length_scale, noise_scale, noise_scale_w, auto_play=True):
     audios = []
     if config["phoneme_type"] == "PhonemeType.ESPEAK":
         config["phoneme_type"] = "espeak"
+    text = phonemize(config, line)
     for phonemes in text:
+        phoneme_ids = phonemes_to_ids(config, phonemes)
         num_speakers = config["num_speakers"]
+        if num_speakers == 1:
+            speaker_id = None  # for now
+        else:
+            speaker_id = sid
         text = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
         text_lengths = np.array([text.shape[1]], dtype=np.int64)
         scales = np.array(
         )
         sid = None
         if speaker_id is not None:
+            sid = np.asarray([int(speaker_id)], dtype=np.int64)  # Convert to 1D array
         audio = model.run(
             None,
             {
                 "sid": sid,
             },
         )[0].squeeze((0, 1))
+        audio = audio_float_to_int16(audio.squeeze())
         audios.append(audio)
     merged_audio = np.concatenate(audios)
     sample_rate = config["audio"]["sample_rate"]
     temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")