Spaces:

StarPigeon
/

ViDove

Sleeping

App Files Files Community

worldqwq commited on Mar 29, 2023

Commit

03b08f3

1 Parent(s): 4657673

Fix

Browse files

Former-commit-id: cd4e3676891d07a7bbd80e6677b003028b7737bd

Files changed (2) hide show

SRT.py +14 -6
pipeline.py +2 -2

SRT.py CHANGED Viewed

@@ -190,8 +190,12 @@ class SRT_script():
                     #print(lines[i])
         pass
-    def split_seg(self, seg, threshold=500):
-        # TODO: evenly split seg to 2 parts and add new seg into self.segments
         source_text = seg.source_text
         translation = seg.translation
         src_commas = [m.start() for m in re.finditer(',', source_text)]
@@ -200,7 +204,10 @@ class SRT_script():
             src_split_idx = src_commas[len(src_commas)//2] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2 - 1]
         else:
             src_space = [m.start() for m in re.finditer(' ', source_text)]
-            src_split_idx = src_space[len(src_space)//2] if len(src_space) % 2 == 1 else src_space[len(src_space)//2 - 1]
         if len(trans_commas) != 0:
             trans_split_idx = trans_commas[len(trans_commas)//2] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2 - 1]
@@ -242,8 +249,9 @@ class SRT_script():
         return result_list
-    def check_len_and_split(self, threshold=30000):
-        # TODO: if sentence length >= threshold, split this segments to two
         segments = []
         for seg in self.segments:
             if len(seg.translation) > threshold:
@@ -257,7 +265,7 @@ class SRT_script():
         pass
     def check_len_and_split_range(self, range, threshold=30):
-        # TODO: if sentence length >= threshold, split this segments to two
         start_seg_id = range[0]
         end_seg_id = range[1]
         extra_len = 0

                     #print(lines[i])
         pass
+    def split_seg(self, seg, threshold):
+        # evenly split seg to 2 parts and add new seg into self.segments
+        if seg.source_text[:2] == ', ':
+            seg.source_text = seg.source_text[2:]
+        if seg.translation[0] == '，':
+            seg.translation = seg.translation[1:]
         source_text = seg.source_text
         translation = seg.translation
         src_commas = [m.start() for m in re.finditer(',', source_text)]
             src_split_idx = src_commas[len(src_commas)//2] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2 - 1]
         else:
             src_space = [m.start() for m in re.finditer(' ', source_text)]
+            if len(src_space) > 0:
+                src_split_idx = src_space[len(src_space)//2] if len(src_space) % 2 == 1 else src_space[len(src_space)//2 - 1]
+            else:
+                src_split_idx = 0
         if len(trans_commas) != 0:
             trans_split_idx = trans_commas[len(trans_commas)//2] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2 - 1]
         return result_list
+    def check_len_and_split(self, threshold=30):
+        # DEPRECATED
+        # if sentence length >= threshold, split this segments to two
         segments = []
         for seg in self.segments:
             if len(seg.translation) > threshold:
         pass
     def check_len_and_split_range(self, range, threshold=30):
+        # if sentence length >= threshold, split this segments to two
         start_seg_id = range[0]
         end_seg_id = range[1]
         extra_len = 0

pipeline.py CHANGED Viewed

@@ -261,10 +261,10 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
             time.sleep(30)
             flag = True
     # add read-time output back and modify the post-processing by using one batch as an unit.
-    srt.set_translation(translate, range, model_name)
     add_length = srt.check_len_and_split_range(range, threshold)
-    srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx,args.link)
     # srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
 # srt.check_len_and_split()

             time.sleep(30)
             flag = True
     # add read-time output back and modify the post-processing by using one batch as an unit.
+    srt.set_translation(translate, range, model_name,args.link)
     add_length = srt.check_len_and_split_range(range, threshold)
+    srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
     # srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
 # srt.check_len_and_split()