Gregniuki commited on
Commit
e23953c
·
1 Parent(s): c7b8201

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -17
app.py CHANGED
@@ -351,24 +351,19 @@ def audio_float_to_int16(
351
  audio_norm = audio_norm.astype("int16")
352
  return audio_norm
353
 
354
- def inferencing(model, config, selected_speaker_id, line, length_scale, noise_scale, noise_scale_w, auto_play=True):
 
355
  audios = []
356
-
357
- # Ensure selected_speaker_id is a valid integer or handle it gracefully
358
- try:
359
- speaker_id = int(selected_speaker_id)
360
- except ValueError:
361
- # Handle the case where selected_speaker_id is not a valid integer
362
- speaker_id = None # Use a default value or handle it differently
363
-
364
  if config["phoneme_type"] == "PhonemeType.ESPEAK":
365
  config["phoneme_type"] = "espeak"
366
-
367
- text = phonemize(config, line) # Make sure phonemize function is defined
368
  for phonemes in text:
369
- phoneme_ids = phonemes_to_ids(config, phonemes) # Make sure phonemes_to_ids function is defined
370
  num_speakers = config["num_speakers"]
371
-
 
 
 
372
  text = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
373
  text_lengths = np.array([text.shape[1]], dtype=np.int64)
374
  scales = np.array(
@@ -377,8 +372,7 @@ def inferencing(model, config, selected_speaker_id, line, length_scale, noise_sc
377
  )
378
  sid = None
379
  if speaker_id is not None:
380
- sid = np.array([speaker_id], dtype=np.int64) # Ensure sid is a 1D array
381
-
382
  audio = model.run(
383
  None,
384
  {
@@ -388,9 +382,8 @@ def inferencing(model, config, selected_speaker_id, line, length_scale, noise_sc
388
  "sid": sid,
389
  },
390
  )[0].squeeze((0, 1))
391
- audio = audio_float_to_int16(audio.squeeze()) # Make sure audio_float_to_int16 is defined
392
  audios.append(audio)
393
-
394
  merged_audio = np.concatenate(audios)
395
  sample_rate = config["audio"]["sample_rate"]
396
  temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
 
351
  audio_norm = audio_norm.astype("int16")
352
  return audio_norm
353
 
354
+
355
+ def inferencing(model, config, sid, line, length_scale, noise_scale, noise_scale_w, auto_play=True):
356
  audios = []
 
 
 
 
 
 
 
 
357
  if config["phoneme_type"] == "PhonemeType.ESPEAK":
358
  config["phoneme_type"] = "espeak"
359
+ text = phonemize(config, line)
 
360
  for phonemes in text:
361
+ phoneme_ids = phonemes_to_ids(config, phonemes)
362
  num_speakers = config["num_speakers"]
363
+ if num_speakers == 1:
364
+ speaker_id = None # for now
365
+ else:
366
+ speaker_id = sid
367
  text = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
368
  text_lengths = np.array([text.shape[1]], dtype=np.int64)
369
  scales = np.array(
 
372
  )
373
  sid = None
374
  if speaker_id is not None:
375
+ sid = np.asarray([int(speaker_id)], dtype=np.int64) # Convert to 1D array
 
376
  audio = model.run(
377
  None,
378
  {
 
382
  "sid": sid,
383
  },
384
  )[0].squeeze((0, 1))
385
+ audio = audio_float_to_int16(audio.squeeze())
386
  audios.append(audio)
 
387
  merged_audio = np.concatenate(audios)
388
  sample_rate = config["audio"]["sample_rate"]
389
  temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")