jpc commited on
Commit
f7b03d4
1 Parent(s): 4743f10

Improve the multilingual parser

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -32,9 +32,14 @@ text_examples = [
32
  ]
33
 
34
  def parse_multilingual_text(input_text):
35
- pattern = r"<(\w+)>\s(.*?)\s(?=<\w+>|$)"
36
- segments = re.findall(pattern, input_text)
37
- return [(lang, text.strip()) for lang, text in segments if lang in LANGUAGES.keys()]
 
 
 
 
 
38
 
39
  @spaces.GPU(enable_queue=True)
40
  def generate_segment_audio(text, lang, speaker_audio, pipe):
 
32
  ]
33
 
34
  def parse_multilingual_text(input_text):
35
+ pattern = r"(?:<(\w+)>)|([^<]+)"
36
+ cur_lang = 'en'
37
+ segments = []
38
+ for i, (lang, txt) in enumerate(re.findall(pattern, input_text)):
39
+ if lang: cur_lang = lang
40
+ else: segments.append((cur_lang, f" {txt} ")) # add spaces to give it some time to switch languages
41
+ if not segments: return [("en", "")]
42
+ return segments
43
 
44
  @spaces.GPU(enable_queue=True)
45
  def generate_segment_audio(text, lang, speaker_audio, pipe):