deeppunct-gr

Runtime error

App Files Files Community

wldmr commited on Mar 20, 2023

Commit

d76f980

1 Parent(s): 54106cc

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -22

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ def cap(match):
     return(match.group().capitalize())
-def predict(input_text):
     model = PunctuationModel()
     output_text = model.restore_punctuation(input_text)
@@ -16,28 +16,33 @@ def predict(input_text):
     srt_file = input_text
     punctuated = output_text
-    srt_file_strip=srt_file.strip()
-    srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
-    srt_file_array=srt_file_sub.split(' ')
-    pcnt_file_array=punctuated.split(' ')
-    # goal: restore the break points i.e. the same number of lines as the srt file
-    # this is necessary, because each line in the srt file corresponds to a frame from the video
-    if len(srt_file_array)!=len(pcnt_file_array):
-        return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
-    pcnt_file_array_hash = []
-    for idx, item in enumerate(srt_file_array):
-        if item.endswith('#'):
-            pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
-        else:
-            pcnt_file_array_hash.append(pcnt_file_array[idx])
-    # assemble the array back to a string
-    pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
-    # ignore the above mentioned line number restaration
-    # instead return the punctuated text as a single string
-    #pcnt_file_cr = output_text
     regex1 = r"\bi\b"
     regex2 = r"(?<=[.?!;])\s*\w"
@@ -56,7 +61,8 @@ Model restores punctuation and case i.e. of the following punctuations -- [! ? .
     examples = ["my name is clara i live in berkeley california"]
     interface = gr.Interface(fn = predict,
-                         inputs = ["text"],
                          outputs = ["text"],
                          title = title,
                          description = description,

     return(match.group().capitalize())
+def predict(input_text, brakes):
     model = PunctuationModel()
     output_text = model.restore_punctuation(input_text)
     srt_file = input_text
     punctuated = output_text
+    # if any of the line brake methods are implemented,
+    # return the text as a single line
+    pcnt_file_cr = output_text
+    if 'timelines' in brakes:
+        srt_file_strip=srt_file.strip()
+        srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
+        srt_file_array=srt_file_sub.split(' ')
+        pcnt_file_array=punctuated.split(' ')
+        # goal: restore the break points i.e. the same number of lines as the srt file
+        # this is necessary, because each line in the srt file corresponds to a frame from the video
+        if len(srt_file_array)!=len(pcnt_file_array):
+            return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
+        pcnt_file_array_hash = []
+        for idx, item in enumerate(srt_file_array):
+            if item.endswith('#'):
+                pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
+            else:
+                pcnt_file_array_hash.append(pcnt_file_array[idx])
+        # assemble the array back to a string
+        pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
+    if 'sentences' in brakes:
+        pass
     regex1 = r"\bi\b"
     regex2 = r"(?<=[.?!;])\s*\w"
     examples = ["my name is clara i live in berkeley california"]
     interface = gr.Interface(fn = predict,
+                         inputs = [gr.CheckboxGroup(["sentences", "timelines"], label="brakes"),
+                                   "text"],
                          outputs = ["text"],
                          title = title,
                          description = description,