John Langley commited on
Commit
4979540
1 Parent(s): 281ff2d

change to cpu

Browse files
Files changed (1) hide show
  1. app.py +30 -18
app.py CHANGED
@@ -61,24 +61,28 @@ os.environ["COQUI_TOS_AGREED"] = "1"
61
  ##print(m)
62
  #m = model_name
63
 
 
 
 
 
64
  #xtts_model = TTS(model_name, gpu=False)
65
  #xtts_model.to("cpu") # no GPU or Amd
66
  #tts.to("cuda") # cuda only
67
 
68
- tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
69
- ModelManager().download_model(tts_model_name)
70
- tts_model_path = os.path.join(get_user_data_dir("tts"), tts_model_name.replace("/", "--"))
71
- config = XttsConfig()
72
- config.load_json(os.path.join(tts_model_path, "config.json"))
73
- xtts_model = Xtts.init_from_config(config)
74
- xtts_model.to("cpu")
75
- xtts_model.load_checkpoint(
76
- config,
77
- checkpoint_path=os.path.join(tts_model_path, "model.pth"),
78
- vocab_path=os.path.join(tts_model_path, "vocab.json"),
79
- eval=True,
80
- use_deepspeed=True,
81
- )
82
  #xtts_model.cuda()
83
  print("Loaded XTTS model")
84
 
@@ -154,10 +158,18 @@ with gr.Blocks(title="Voice chat with LLM") as demo:
154
  def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
155
  if sentence != "":
156
  print("Processing sentence")
157
- generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, None, return_as_byte=True)
158
- if generated_speech is not None:
159
- _, audio_dict = generated_speech
160
- yield (sentence, chatbot_history, audio_dict["value"])
 
 
 
 
 
 
 
 
161
 
162
  if initial_greeting:
163
  # Process only the initial greeting if specified
 
61
  ##print(m)
62
  #m = model_name
63
 
64
+ xtts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
65
+
66
+
67
+
68
  #xtts_model = TTS(model_name, gpu=False)
69
  #xtts_model.to("cpu") # no GPU or Amd
70
  #tts.to("cuda") # cuda only
71
 
72
+ #tts_model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
73
+ #ModelManager().download_model(tts_model_name)
74
+ #tts_model_path = os.path.join(get_user_data_dir("tts"), tts_model_name.replace("/", "--"))
75
+ #config = XttsConfig()
76
+ #config.load_json(os.path.join(tts_model_path, "config.json"))
77
+ #xtts_model = Xtts.init_from_config(config)
78
+ #xtts_model.to("cpu")
79
+ #xtts_model.load_checkpoint(
80
+ # config,
81
+ # checkpoint_path=os.path.join(tts_model_path, "model.pth"),
82
+ # vocab_path=os.path.join(tts_model_path, "vocab.json"),
83
+ # eval=True,
84
+ # use_deepspeed=True,
85
+ #)
86
  #xtts_model.cuda()
87
  print("Loaded XTTS model")
88
 
 
158
  def handle_speech_generation(sentence, chatbot_history, chatbot_voice):
159
  if sentence != "":
160
  print("Processing sentence")
161
+ # generate speech by cloning a voice using default settings
162
+ xtts_model.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.",
163
+ file_path="output.wav",
164
+ speaker_wav=[f"examples/{chatbot_voice}.wav"],
165
+ language="en",
166
+ split_sentences=True
167
+ )
168
+ yield (sentence, chatbot_history, wave_header_chunk())
169
+ #generated_speech = generate_speech_for_sentence(chatbot_history, chatbot_voice, sentence, xtts_model, None, return_as_byte=True)
170
+ #if generated_speech is not None:
171
+ # _, audio_dict = generated_speech
172
+ # yield (sentence, chatbot_history, audio_dict["value"])
173
 
174
  if initial_greeting:
175
  # Process only the initial greeting if specified