John Langley commited on
Commit
bc0e3c7
·
1 Parent(s): 4404242

trying things with cpu

Browse files
Files changed (1) hide show
  1. app.py +46 -46
app.py CHANGED
@@ -32,13 +32,13 @@ from faster_whisper import WhisperModel
32
  import gradio as gr
33
  from huggingface_hub import hf_hub_download
34
  from llama_cpp import Llama
35
- from TTS.tts.configs.xtts_config import XttsConfig
36
- from TTS.tts.models.xtts import Xtts
37
- from TTS.utils.generic_utils import get_user_data_dir
38
- from TTS.utils.manage import ModelManager
39
 
40
  # Local imports
41
- from utils import get_sentence, generate_speech_for_sentence, wave_header_chunk
42
 
43
  # Load Whisper ASR model
44
  print("Loading Whisper ASR")
@@ -52,22 +52,22 @@ mistral_llm = Llama(model_path=mistral_model_path,n_gpu_layers=35,max_new_tokens
52
 
53
 
54
  # Load XTTS Model
55
- print("Loading XTTS model")
56
- os.environ["COQUI_TOS_AGREED"] = "1"
57
- tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
58
- ModelManager().download_model(tts_model_name)
59
- tts_model_path = os.path.join(get_user_data_dir("tts"), tts_model_name.replace("/", "--"))
60
- config = XttsConfig()
61
- config.load_json(os.path.join(tts_model_path, "config.json"))
62
- xtts_model = Xtts.init_from_config(config)
63
- xtts_model.to("cpu")
64
- xtts_model.load_checkpoint(
65
- config,
66
- checkpoint_path=os.path.join(tts_model_path, "model.pth"),
67
- vocab_path=os.path.join(tts_model_path, "vocab.json"),
68
- eval=True,
69
- use_deepspeed=True,
70
- )
71
  #xtts_model.cuda()
72
  #print("UN-Loading XTTS model")
73
 
@@ -114,8 +114,7 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
114
  value=None,
115
  label="Generated audio response",
116
  streaming=True,
117
- autoplay=True,
118
- interactive=False,
119
  show_label=True,
120
  )
121
 
@@ -137,36 +136,37 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
137
 
138
  def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
139
  # Start by yielding an initial empty audio to set up autoplay
140
- yield ("", chatbot_history, wave_header_chunk())
 
141
 
142
  # Helper function to handle the speech generation and yielding process
143
- def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
144
- if sentence != "":
145
- print("Processing sentence")
146
- generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
147
- if generated_speech is not None:
148
- _, audio_dict = generated_speech
149
- yield (sentence, chatbot_history, audio_dict["value"])
150
-
151
- if initial_greeting:
152
- # Process only the initial greeting if specified
153
- for _, sentence in chatbot_history:
154
- yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
155
- else:
156
- # Continuously get and process sentences from a generator function
157
- for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
158
- print("Inserting sentence to queue")
159
- yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
160
 
161
  txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
162
- ).then(fn=generate_speech, inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
163
 
164
  txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
165
 
166
- audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
167
- ).then(fn=generate_speech, inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
168
 
169
- audio_msg.then(fn=lambda: (gr.update(interactive=True),gr.update(interactive=True,value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
170
 
171
  FOOTNOTE = """
172
  This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
@@ -179,5 +179,5 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
179
  - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
180
  - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
181
  gr.Markdown(FOOTNOTE)
182
- demo.load(fn=generate_speech, inputs=[chatbot,chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot, audio_playback])
183
  demo.queue().launch(debug=True,share=True)
 
32
  import gradio as gr
33
  from huggingface_hub import hf_hub_download
34
  from llama_cpp import Llama
35
+ #from TTS.tts.configs.xtts_config import XttsConfig
36
+ #from TTS.tts.models.xtts import Xtts
37
+ #from TTS.utils.generic_utils import get_user_data_dir
38
+ #from TTS.utils.manage import ModelManager
39
 
40
  # Local imports
41
+ from utils import get_sentence #, generate_speech_for_sentence, wave_header_chunk
42
 
43
  # Load Whisper ASR model
44
  print("Loading Whisper ASR")
 
52
 
53
 
54
  # Load XTTS Model
55
+ #print("Loading XTTS model")
56
+ #os.environ["COQUI_TOS_AGREED"] = "1"
57
+ #tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
58
+ #ModelManager().download_model(tts_model_name)
59
+ #tts_model_path = os.path.join(get_user_data_dir("tts"), tts_model_name.replace("/", "--"))
60
+ #config = XttsConfig()
61
+ #config.load_json(os.path.join(tts_model_path, "config.json"))
62
+ #xtts_model = Xtts.init_from_config(config)
63
+ #xtts_model.to("cpu")
64
+ #xtts_model.load_checkpoint(
65
+ # config,
66
+ # checkpoint_path=os.path.join(tts_model_path, "model.pth"),
67
+ # vocab_path=os.path.join(tts_model_path, "vocab.json"),
68
+ # eval=True,
69
+ # use_deepspeed=True,
70
+ #)
71
  #xtts_model.cuda()
72
  #print("UN-Loading XTTS model")
73
 
 
114
  value=None,
115
  label="Generated audio response",
116
  streaming=True,
117
+ autoplay=True,interactive=False,
 
118
  show_label=True,
119
  )
120
 
 
136
 
137
  def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
138
  # Start by yielding an initial empty audio to set up autoplay
139
+ #yield ("", chatbot_history, wave_header_chunk())
140
+ yield ("", chatbot_history)
141
 
142
  # Helper function to handle the speech generation and yielding process
143
+ # def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
144
+ # if sentence != "":
145
+ # print("Processing sentence")
146
+ # generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, xtts_supported_languages=config.languages, return_as_byte=True)
147
+ # if generated_speech is not None:
148
+ # _, audio_dict = generated_speech
149
+ # yield (sentence, chatbot_history, audio_dict["value"])
150
+
151
+ # if initial_greeting:
152
+ # # Process only the initial greeting if specified
153
+ # for _, sentence in chatbot_history:
154
+ # yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
155
+ # else:
156
+ # # Continuously get and process sentences from a generator function
157
+ # for sentence, chatbot_history in get_sentence(chatbot_history, mistral_llm):
158
+ # print("Inserting sentence to queue")
159
+ # yield from handle_speech_generation(sentence, chatbot_history, chatbot_voice)
160
 
161
  txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
162
+ )#.then(fn=generate_speech, inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
163
 
164
  txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
165
 
166
+ #audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
167
+ # ).then(fn=generate_speech, inputs=[chatbot,chatbot_voice], outputs=[sentence, chatbot, audio_playback])
168
 
169
+ #audio_msg.then(fn=lambda: (gr.update(interactive=True),gr.update(interactive=True,value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
170
 
171
  FOOTNOTE = """
172
  This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
 
179
  - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
180
  - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
181
  gr.Markdown(FOOTNOTE)
182
+ demo.load(fn=generate_speech, inputs=[chatbot,chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot]) #outputs=[sentence, chatbot, audio_playback])
183
  demo.queue().launch(debug=True,share=True)