Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ def cap(match):
|
|
7 |
return(match.group().capitalize())
|
8 |
|
9 |
|
10 |
-
def predict(input_text):
|
11 |
|
12 |
model = PunctuationModel()
|
13 |
output_text = model.restore_punctuation(input_text)
|
@@ -16,28 +16,33 @@ def predict(input_text):
|
|
16 |
srt_file = input_text
|
17 |
punctuated = output_text
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
pcnt_file_array=punctuated.split(' ')
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
|
42 |
regex1 = r"\bi\b"
|
43 |
regex2 = r"(?<=[.?!;])\s*\w"
|
@@ -56,7 +61,8 @@ Model restores punctuation and case i.e. of the following punctuations -- [! ? .
|
|
56 |
examples = ["my name is clara i live in berkeley california"]
|
57 |
|
58 |
interface = gr.Interface(fn = predict,
|
59 |
-
inputs = ["
|
|
|
60 |
outputs = ["text"],
|
61 |
title = title,
|
62 |
description = description,
|
|
|
7 |
return(match.group().capitalize())
|
8 |
|
9 |
|
10 |
+
def predict(input_text, brakes):
|
11 |
|
12 |
model = PunctuationModel()
|
13 |
output_text = model.restore_punctuation(input_text)
|
|
|
16 |
srt_file = input_text
|
17 |
punctuated = output_text
|
18 |
|
19 |
+
# if any of the line brake methods are implemented,
|
20 |
+
# return the text as a single line
|
21 |
+
pcnt_file_cr = output_text
|
|
|
22 |
|
23 |
+
if 'timelines' in brakes:
|
24 |
+
srt_file_strip=srt_file.strip()
|
25 |
+
srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
|
26 |
+
srt_file_array=srt_file_sub.split(' ')
|
27 |
+
pcnt_file_array=punctuated.split(' ')
|
28 |
+
|
29 |
+
# goal: restore the break points i.e. the same number of lines as the srt file
|
30 |
+
# this is necessary, because each line in the srt file corresponds to a frame from the video
|
31 |
+
if len(srt_file_array)!=len(pcnt_file_array):
|
32 |
+
return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
|
33 |
+
pcnt_file_array_hash = []
|
34 |
+
for idx, item in enumerate(srt_file_array):
|
35 |
+
if item.endswith('#'):
|
36 |
+
pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
|
37 |
+
else:
|
38 |
+
pcnt_file_array_hash.append(pcnt_file_array[idx])
|
39 |
+
|
40 |
+
# assemble the array back to a string
|
41 |
+
pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
|
42 |
|
43 |
+
|
44 |
+
if 'sentences' in brakes:
|
45 |
+
pass
|
46 |
|
47 |
regex1 = r"\bi\b"
|
48 |
regex2 = r"(?<=[.?!;])\s*\w"
|
|
|
61 |
examples = ["my name is clara i live in berkeley california"]
|
62 |
|
63 |
interface = gr.Interface(fn = predict,
|
64 |
+
inputs = [gr.CheckboxGroup(["sentences", "timelines"], label="brakes"),
|
65 |
+
"text"],
|
66 |
outputs = ["text"],
|
67 |
title = title,
|
68 |
description = description,
|