wldmr commited on
Commit
d76f980
·
1 Parent(s): 54106cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -22
app.py CHANGED
@@ -7,7 +7,7 @@ def cap(match):
7
  return(match.group().capitalize())
8
 
9
 
10
- def predict(input_text):
11
 
12
  model = PunctuationModel()
13
  output_text = model.restore_punctuation(input_text)
@@ -16,28 +16,33 @@ def predict(input_text):
16
  srt_file = input_text
17
  punctuated = output_text
18
 
19
- srt_file_strip=srt_file.strip()
20
- srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
21
- srt_file_array=srt_file_sub.split(' ')
22
- pcnt_file_array=punctuated.split(' ')
23
 
24
- # goal: restore the break points i.e. the same number of lines as the srt file
25
- # this is necessary, because each line in the srt file corresponds to a frame from the video
26
- if len(srt_file_array)!=len(pcnt_file_array):
27
- return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
28
- pcnt_file_array_hash = []
29
- for idx, item in enumerate(srt_file_array):
30
- if item.endswith('#'):
31
- pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
32
- else:
33
- pcnt_file_array_hash.append(pcnt_file_array[idx])
34
-
35
- # assemble the array back to a string
36
- pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
 
 
 
 
 
 
37
 
38
- # ignore the above mentioned line number restaration
39
- # instead return the punctuated text as a single string
40
- #pcnt_file_cr = output_text
41
 
42
  regex1 = r"\bi\b"
43
  regex2 = r"(?<=[.?!;])\s*\w"
@@ -56,7 +61,8 @@ Model restores punctuation and case i.e. of the following punctuations -- [! ? .
56
  examples = ["my name is clara i live in berkeley california"]
57
 
58
  interface = gr.Interface(fn = predict,
59
- inputs = ["text"],
 
60
  outputs = ["text"],
61
  title = title,
62
  description = description,
 
7
  return(match.group().capitalize())
8
 
9
 
10
+ def predict(input_text, brakes):
11
 
12
  model = PunctuationModel()
13
  output_text = model.restore_punctuation(input_text)
 
16
  srt_file = input_text
17
  punctuated = output_text
18
 
19
+ # if any of the line brake methods are implemented,
20
+ # return the text as a single line
21
+ pcnt_file_cr = output_text
 
22
 
23
+ if 'timelines' in brakes:
24
+ srt_file_strip=srt_file.strip()
25
+ srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
26
+ srt_file_array=srt_file_sub.split(' ')
27
+ pcnt_file_array=punctuated.split(' ')
28
+
29
+ # goal: restore the break points i.e. the same number of lines as the srt file
30
+ # this is necessary, because each line in the srt file corresponds to a frame from the video
31
+ if len(srt_file_array)!=len(pcnt_file_array):
32
+ return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
33
+ pcnt_file_array_hash = []
34
+ for idx, item in enumerate(srt_file_array):
35
+ if item.endswith('#'):
36
+ pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
37
+ else:
38
+ pcnt_file_array_hash.append(pcnt_file_array[idx])
39
+
40
+ # assemble the array back to a string
41
+ pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
42
 
43
+
44
+ if 'sentences' in brakes:
45
+ pass
46
 
47
  regex1 = r"\bi\b"
48
  regex2 = r"(?<=[.?!;])\s*\w"
 
61
  examples = ["my name is clara i live in berkeley california"]
62
 
63
  interface = gr.Interface(fn = predict,
64
+ inputs = [gr.CheckboxGroup(["sentences", "timelines"], label="brakes"),
65
+ "text"],
66
  outputs = ["text"],
67
  title = title,
68
  description = description,