Spaces:

coqui
/

voice-chat-with-mistral

Paused

App Files Files Community

ggoknar commited on Oct 17, 2023

Commit

3f2e1a8

•

1 Parent(s): bd470e7

limit speech to 250 characters for now

Browse files

Files changed (1) hide show

app.py +49 -36

app.py CHANGED Viewed

@@ -399,7 +399,13 @@ def generate_speech(history):
     for sentence, history in get_sentence(history):
         print(sentence)
         # Sometimes prompt </s> coming on output remove it
         sentence = sentence.replace("</s>", "")
         # A fast fix for last chacter, may produce weird sounds if it is with text
         if sentence[-1] in ["!", "?", ".", ","]:
             # just add a space
@@ -410,49 +416,56 @@ def generate_speech(history):
             # generate speech using precomputed latents
             # This is not streaming but it will be fast
             # wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
-            audio_stream = get_voice_streaming(
-                sentence, language, latent_map["Female_Voice"]
-            )
-            wav_chunks = wave_header_chunk()
-            frame_length = 0
-            for chunk in audio_stream:
-                try:
-                    wav_bytestream += chunk
-                    if DIRECT_STREAM:
-                        yield (
-                            gr.Audio.update(
-                                value=wave_header_chunk() + chunk, autoplay=True
-                            ),
-                            history,
-                        )
-                        wait_time = len(chunk) / 2 / 24000
-                        wait_time = AUDIO_WAIT_MODIFIER * wait_time
-                        print("Sleeping till chunk end")
-                        time.sleep(wait_time)
-                    else:
-                        wav_chunks += chunk
-                        frame_length += len(chunk)
-                except:
-                    # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
-                    continue
             if not DIRECT_STREAM:
                 yield (
                     gr.Audio.update(value=None, autoplay=True),
                     history,
                 )  # hack to switch autoplay
-                yield (gr.Audio.update(value=wav_chunks, autoplay=True), history)
-                # Streaming wait time calculation
-                # audio_length = frame_length / sample_width/ frame_rate
-                wait_time = frame_length / 2 / 24000
-                # for non streaming
-                # wait_time= librosa.get_duration(path=wav)
-                wait_time = AUDIO_WAIT_MODIFIER * wait_time
-                print("Sleeping till audio end")
-                time.sleep(wait_time)
         except RuntimeError as e:
             if "device-side assert" in str(e):
@@ -480,7 +493,7 @@ def generate_speech(history):
     # yield (combined_file_name, history
     wav_bytestream = wave_header_chunk() + wav_bytestream
-    time.sleep(0.5)
     yield (gr.Audio.update(value=None, autoplay=False), history)
     yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)

     for sentence, history in get_sentence(history):
         print(sentence)
         # Sometimes prompt </s> coming on output remove it
+        # Some post process for speech only
         sentence = sentence.replace("</s>", "")
+        sentence = sentence.replace("```", "")
+        sentence = sentence.replace("```", "")
+        sentence = sentence.replace("(", " ")
+        sentence = sentence.replace(")", " ")
         # A fast fix for last chacter, may produce weird sounds if it is with text
         if sentence[-1] in ["!", "?", ".", ","]:
             # just add a space
             # generate speech using precomputed latents
             # This is not streaming but it will be fast
             # wav = get_voice(sentence,language, latent_map["Female_Voice"], suffix=len(wav_list))
+            if len(sentence) > 250:
+                # should not generate voice it will hit token limit
+                # It should not generate audio for it
+                audio_stream = None
+            else:
+                audio_stream = get_voice_streaming(
+                    sentence, language, latent_map["Female_Voice"]
+                )
+            if audio_stream is not None:
+                wav_chunks = wave_header_chunk()
+                frame_length = 0
+                for chunk in audio_stream:
+                    try:
+                        wav_bytestream += chunk
+                        if DIRECT_STREAM:
+                            yield (
+                                gr.Audio.update(
+                                    value=wave_header_chunk() + chunk, autoplay=True
+                                ),
+                                history,
+                            )
+                            wait_time = len(chunk) / 2 / 24000
+                            wait_time = AUDIO_WAIT_MODIFIER * wait_time
+                            print("Sleeping till chunk end")
+                            time.sleep(wait_time)
+                        else:
+                            wav_chunks += chunk
+                            frame_length += len(chunk)
+                    except:
+                        # hack to continue on playing. sometimes last chunk is empty , will be fixed on next TTS
+                        continue
             if not DIRECT_STREAM:
                 yield (
                     gr.Audio.update(value=None, autoplay=True),
                     history,
                 )  # hack to switch autoplay
+                if audio_stream is not None:
+                    yield (gr.Audio.update(value=wav_chunks, autoplay=True), history)
+                    # Streaming wait time calculation
+                    # audio_length = frame_length / sample_width/ frame_rate
+                    wait_time = frame_length / 2 / 24000
+                    # for non streaming
+                    # wait_time= librosa.get_duration(path=wav)
+                    wait_time = AUDIO_WAIT_MODIFIER * wait_time
+                    print("Sleeping till audio end")
+                    time.sleep(wait_time)
         except RuntimeError as e:
             if "device-side assert" in str(e):
     # yield (combined_file_name, history
     wav_bytestream = wave_header_chunk() + wav_bytestream
+    time.sleep(0.7)
     yield (gr.Audio.update(value=None, autoplay=False), history)
     yield (gr.Audio.update(value=wav_bytestream, autoplay=False), history)