avans06 commited on
Commit
a760511
1 Parent(s): 19045f6

Fixed the issue that occurred after enabling the

Browse files

"Word Timestamps - Highlight Words" feature.

Files changed (1) hide show
  1. src/utils.py +10 -12
src/utils.py CHANGED
@@ -189,7 +189,7 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
189
  if highlight_words:
190
  last = subtitle_start
191
 
192
- for i, this_word in enumerate(words):
193
  start = this_word['start']
194
  end = this_word['end']
195
 
@@ -207,15 +207,10 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
207
  'end' : end,
208
  'text' : __join_words(
209
  [
210
- {
211
- "word": re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word)
212
- if j == i
213
- else word,
214
- # The HTML tags <u> and </u> are not displayed,
215
- # # so they should not be counted in the word length
216
- "length": len(word)
217
- } for j, word in enumerate(text_words)
218
- ], maxLineWidth)
219
  }
220
  last = end
221
 
@@ -238,9 +233,9 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
238
  result.update({'original': process_text(original_text, maxLineWidth)})
239
  yield result
240
 
241
- def __join_words(words: Iterator[Union[str, dict]], maxLineWidth: int = None):
242
  result = "".join(words)
243
-
244
  if maxLineWidth is None or maxLineWidth < 0:
245
  return result
246
 
@@ -273,6 +268,9 @@ def process_text(text: str, maxLineWidth=None):
273
  if currentLine:
274
  currentLine += " "
275
  wordWidth += 1
 
 
 
276
  for wordIdx, char in enumerate(word):
277
  if unicodedata.east_asian_width(char) not in {'W', 'F'}:
278
  wordWidth += 1
 
189
  if highlight_words:
190
  last = subtitle_start
191
 
192
+ for idx, this_word in enumerate(words):
193
  start = this_word['start']
194
  end = this_word['end']
195
 
 
207
  'end' : end,
208
  'text' : __join_words(
209
  [
210
+ re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word) if subidx == idx else word
211
+ for subidx, word in enumerate(text_words)
212
+ ]
213
+ , maxLineWidth)
 
 
 
 
 
214
  }
215
  last = end
216
 
 
233
  result.update({'original': process_text(original_text, maxLineWidth)})
234
  yield result
235
 
236
+ def __join_words(words: Iterator[str], maxLineWidth: int = None):
237
  result = "".join(words)
238
+
239
  if maxLineWidth is None or maxLineWidth < 0:
240
  return result
241
 
 
268
  if currentLine:
269
  currentLine += " "
270
  wordWidth += 1
271
+ # The HTML tags <u> and </u> are not displayed,
272
+ # so they should not be counted in the word length
273
+ wordWidth -= 7 if "<u>" in word else 0
274
  for wordIdx, char in enumerate(word):
275
  if unicodedata.east_asian_width(char) not in {'W', 'F'}:
276
  wordWidth += 1