jbilcke-hf HF staff commited on
Commit
3678d66
1 Parent(s): 29a572d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -88,6 +88,8 @@ api = HfApi(token=HF_TOKEN)
88
 
89
  repo_id = "jbilcke-hf/zephyr-xtts"
90
 
 
 
91
  default_system_message = f"""
92
  You're the storyteller, crafting a short tale for young listeners. Please abide by these guidelines:
93
  - Keep your sentences short, concise and easy to understand.
@@ -143,6 +145,26 @@ LLAMA_VERBOSE=False
143
  print("Running LLM Zephyr")
144
  llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  # <|system|>
147
  # You are a friendly chatbot who always responds in the style of a pirate.</s>
148
  # <|user|>
@@ -507,7 +529,6 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
507
  print("Sentence for speech:", sentence)
508
 
509
  try:
510
- SENTENCE_SPLIT_LENGTH=350
511
  if len(sentence)<SENTENCE_SPLIT_LENGTH:
512
  # no problem continue on
513
  sentence_list = [sentence]
@@ -515,7 +536,8 @@ def generate_speech_for_sentence(history, chatbot_role, sentence, return_as_byte
515
  # Until now nltk likely split sentences properly but we need additional
516
  # check for longer sentence and split at last possible position
517
  # Do whatever necessary, first break at hypens then spaces and then even split very long words
518
- sentence_list=textwrap.wrap(sentence,SENTENCE_SPLIT_LENGTH)
 
519
  print("SPLITTED LONG SENTENCE:",sentence_list)
520
 
521
  for sentence in sentence_list:
 
88
 
89
  repo_id = "jbilcke-hf/zephyr-xtts"
90
 
91
+ SENTENCE_SPLIT_LENGTH=250
92
+
93
  default_system_message = f"""
94
  You're the storyteller, crafting a short tale for young listeners. Please abide by these guidelines:
95
  - Keep your sentences short, concise and easy to understand.
 
145
  print("Running LLM Zephyr")
146
  llm_zephyr = Llama(model_path=zephyr_model_path,n_gpu_layers=GPU_LAYERS-10,max_new_tokens=256, context_window=4096, n_ctx=4096,n_batch=128,verbose=LLAMA_VERBOSE)
147
 
148
+ def split_sentences(text, max_len):
149
+ # Apply custom rules to enforce sentence breaks with double punctuation
150
+ text = re.sub(r"(\s*\.{2})\s*", r".\1 ", text) # for '..'
151
+ text = re.sub(r"(\s*\!{2})\s*", r"!\1 ", text) # for '!!'
152
+
153
+ # Use NLTK to split into sentences
154
+ sentences = nltk.sent_tokenize(text)
155
+
156
+ # Then check if each sentence is greater than max_len, if so, use textwrap to split it
157
+ sentence_list = []
158
+ for sent in sentences:
159
+ if len(sent) > max_len:
160
+ wrapped = textwrap.wrap(sent, max_len, break_long_words=True)
161
+ sentence_list.extend(wrapped)
162
+ else:
163
+ sentence_list.append(sent)
164
+
165
+ return sentence_list
166
+
167
+
168
  # <|system|>
169
  # You are a friendly chatbot who always responds in the style of a pirate.</s>
170
  # <|user|>
 
529
  print("Sentence for speech:", sentence)
530
 
531
  try:
 
532
  if len(sentence)<SENTENCE_SPLIT_LENGTH:
533
  # no problem continue on
534
  sentence_list = [sentence]
 
536
  # Until now nltk likely split sentences properly but we need additional
537
  # check for longer sentence and split at last possible position
538
  # Do whatever necessary, first break at hypens then spaces and then even split very long words
539
+ # sentence_list=textwrap.wrap(sentence,SENTENCE_SPLIT_LENGTH)
540
+ sentence_list = split_sentences(sentence, SENTENCE_SPLIT_LENGTH)
541
  print("SPLITTED LONG SENTENCE:",sentence_list)
542
 
543
  for sentence in sentence_list: