Fixed the issue that occurred after enabling the
Browse files"Word Timestamps - Highlight Words" feature.
- src/utils.py +10 -12
src/utils.py
CHANGED
@@ -189,7 +189,7 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
189 |
if highlight_words:
|
190 |
last = subtitle_start
|
191 |
|
192 |
-
for
|
193 |
start = this_word['start']
|
194 |
end = this_word['end']
|
195 |
|
@@ -207,15 +207,10 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
207 |
'end' : end,
|
208 |
'text' : __join_words(
|
209 |
[
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
# The HTML tags <u> and </u> are not displayed,
|
215 |
-
# # so they should not be counted in the word length
|
216 |
-
"length": len(word)
|
217 |
-
} for j, word in enumerate(text_words)
|
218 |
-
], maxLineWidth)
|
219 |
}
|
220 |
last = end
|
221 |
|
@@ -238,9 +233,9 @@ def __subtitle_preprocessor_iterator(transcript: Iterator[dict], maxLineWidth: i
|
|
238 |
result.update({'original': process_text(original_text, maxLineWidth)})
|
239 |
yield result
|
240 |
|
241 |
-
def __join_words(words: Iterator[
|
242 |
result = "".join(words)
|
243 |
-
|
244 |
if maxLineWidth is None or maxLineWidth < 0:
|
245 |
return result
|
246 |
|
@@ -273,6 +268,9 @@ def process_text(text: str, maxLineWidth=None):
|
|
273 |
if currentLine:
|
274 |
currentLine += " "
|
275 |
wordWidth += 1
|
|
|
|
|
|
|
276 |
for wordIdx, char in enumerate(word):
|
277 |
if unicodedata.east_asian_width(char) not in {'W', 'F'}:
|
278 |
wordWidth += 1
|
|
|
189 |
if highlight_words:
|
190 |
last = subtitle_start
|
191 |
|
192 |
+
for idx, this_word in enumerate(words):
|
193 |
start = this_word['start']
|
194 |
end = this_word['end']
|
195 |
|
|
|
207 |
'end' : end,
|
208 |
'text' : __join_words(
|
209 |
[
|
210 |
+
re.sub(r"^(\s*)(.*)$", r"\1<u>\2</u>", word) if subidx == idx else word
|
211 |
+
for subidx, word in enumerate(text_words)
|
212 |
+
]
|
213 |
+
, maxLineWidth)
|
|
|
|
|
|
|
|
|
|
|
214 |
}
|
215 |
last = end
|
216 |
|
|
|
233 |
result.update({'original': process_text(original_text, maxLineWidth)})
|
234 |
yield result
|
235 |
|
236 |
+
def __join_words(words: Iterator[str], maxLineWidth: int = None):
|
237 |
result = "".join(words)
|
238 |
+
|
239 |
if maxLineWidth is None or maxLineWidth < 0:
|
240 |
return result
|
241 |
|
|
|
268 |
if currentLine:
|
269 |
currentLine += " "
|
270 |
wordWidth += 1
|
271 |
+
# The HTML tags <u> and </u> are not displayed,
|
272 |
+
# so they should not be counted in the word length
|
273 |
+
wordWidth -= 7 if "<u>" in word else 0
|
274 |
for wordIdx, char in enumerate(word):
|
275 |
if unicodedata.east_asian_width(char) not in {'W', 'F'}:
|
276 |
wordWidth += 1
|