Spaces:

JohnInizio
/

persona-chat-demo

Runtime error

App Files Files Community

John Langley commited on Aug 12, 2024

Commit

bc0e3c7

1 Parent(s): 4404242

trying things with cpu

Browse files

Files changed (1) hide show

app.py +46 -46

app.py CHANGED Viewed

@@ -32,13 +32,13 @@ from faster_whisper import WhisperModel
 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
-from TTS.utils.generic_utils import get_user_data_dir
-from TTS.utils.manage import ModelManager
 # Local imports
-from utils import get_sentence, generate_speech_for_sentence, wave_header_chunk
 # Load Whisper ASR model
 print("Loading Whisper ASR")
@@ -52,22 +52,22 @@ mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens
 # Load XTTS Model
-print("Loading XTTS model")
-os.environ["COQUI_TOS_AGREED"] = "1"
-tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
-ModelManager().download_model(tts_model_name)
-tts_model_path = os.path.join(get_user_data_dir("tts"), tts_model_name.replace("/", "--"))
-config = XttsConfig()
-config.load_json(os.path.join(tts_model_path, "config.json"))
-xtts_model = Xtts.init_from_config(config)
-xtts_model.to("cpu")
-xtts_model.load_checkpoint(
-    config,
-    checkpoint_path=os.path.join(tts_model_path, "model.pth"),
-    vocab_path=os.path.join(tts_model_path, "vocab.json"),
-    eval=True,
-    use_deepspeed=True,
-)
 #xtts_model.cuda()
 #print("UN-Loading XTTS model")
@@ -114,8 +114,7 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
             value=None,
             label="Generated audio response",
             streaming=True,
-            autoplay=True,
-            interactive=False,
             show_label=True,
         )
@@ -137,36 +136,37 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
     def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
         # Start by yielding an initial empty audio to set up autoplay
-        yield ("", chatbot_history, wave_header_chunk())
         # Helper function to handle the speech generation and yielding process
-        def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
-            if sentence != "":
-                print("Processing sentence")
-                generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
-                if generated_speech is not None:
-                    _, audio_dict = generated_speech
-                    yield (sentence, chatbot_history, audio_dict["value"])
-        if initial_greeting:
-            # Process only the initial greeting if specified
-            for _, sentence in chatbot_history:
-                yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
-        else:
-            # Continuously get and process sentences from a generator function
-            for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
-                print("Inserting sentence to queue")
-                yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
     txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
-                             ).then(fn=generate_speech,  inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
     txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
-    audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
-                                            ).then(fn=generate_speech,  inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
-    audio_msg.then(fn=lambda: (gr.update(interactive=True),gr.update(interactive=True,value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
     FOOTNOTE = """
             This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
@@ -179,5 +179,5 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
             - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
             - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
     gr.Markdown(FOOTNOTE)
-    demo.load(fn=generate_speech, inputs=[chatbot,chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot, audio_playback])
 demo.queue().launch(debug=True,share=True)

 import gradio as gr
 from huggingface_hub import hf_hub_download
 from llama_cpp import Llama
+#from TTS.tts.configs.xtts_config import XttsConfig
+#from TTS.tts.models.xtts import Xtts
+#from TTS.utils.generic_utils import get_user_data_dir
+#from TTS.utils.manage import ModelManager
 # Local imports
+from utils import get_sentence #, generate_speech_for_sentence, wave_header_chunk
 # Load Whisper ASR model
 print("Loading Whisper ASR")
 # Load XTTS Model
+#print("Loading XTTS model")
+#os.environ["COQUI_TOS_AGREED"] = "1"
+#tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
+#ModelManager().download_model(tts_model_name)
+#tts_model_path = os.path.join(get_user_data_dir("tts"), tts_model_name.replace("/", "--"))
+#config = XttsConfig()
+#config.load_json(os.path.join(tts_model_path, "config.json"))
+#xtts_model = Xtts.init_from_config(config)
+#xtts_model.to("cpu")
+#xtts_model.load_checkpoint(
+#    config,
+#    checkpoint_path=os.path.join(tts_model_path, "model.pth"),
+#    vocab_path=os.path.join(tts_model_path, "vocab.json"),
+#    eval=True,
+#    use_deepspeed=True,
+#)
 #xtts_model.cuda()
 #print("UN-Loading XTTS model")
             value=None,
             label="Generated audio response",
             streaming=True,
+            autoplay=True,interactive=False,
             show_label=True,
         )
     def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
         # Start by yielding an initial empty audio to set up autoplay
+        #yield ("", chatbot_history, wave_header_chunk())
+        yield ("", chatbot_history)
         # Helper function to handle the speech generation and yielding process
+    #    def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
+    #        if sentence != "":
+    #            print("Processing sentence")
+    #            generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
+    #            if generated_speech is not None:
+    #                _, audio_dict = generated_speech
+    #                yield (sentence, chatbot_history, audio_dict["value"])
+    #    if initial_greeting:
+    #        # Process only the initial greeting if specified
+    #        for _, sentence in chatbot_history:
+    #            yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
+    #    else:
+    #        # Continuously get and process sentences from a generator function
+    #        for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
+    #            print("Inserting sentence to queue")
+    #            yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
     txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
+                             )#.then(fn=generate_speech,  inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
     txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
+    #audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
+    #                                        ).then(fn=generate_speech,  inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
+    #audio_msg.then(fn=lambda: (gr.update(interactive=True),gr.update(interactive=True,value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
     FOOTNOTE = """
             This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
             - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
             - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
     gr.Markdown(FOOTNOTE)
+    demo.load(fn=generate_speech, inputs=[chatbot,chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot])  #outputs=[sentence, chatbot, audio_playback])
 demo.queue().launch(debug=True,share=True)