Spaces:
Sleeping
Sleeping
worldqwq
commited on
Commit
·
03b08f3
1
Parent(s):
4657673
Fix
Browse filesFormer-commit-id: cd4e3676891d07a7bbd80e6677b003028b7737bd
- SRT.py +14 -6
- pipeline.py +2 -2
SRT.py
CHANGED
@@ -190,8 +190,12 @@ class SRT_script():
|
|
190 |
#print(lines[i])
|
191 |
pass
|
192 |
|
193 |
-
def split_seg(self, seg, threshold
|
194 |
-
#
|
|
|
|
|
|
|
|
|
195 |
source_text = seg.source_text
|
196 |
translation = seg.translation
|
197 |
src_commas = [m.start() for m in re.finditer(',', source_text)]
|
@@ -200,7 +204,10 @@ class SRT_script():
|
|
200 |
src_split_idx = src_commas[len(src_commas)//2] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2 - 1]
|
201 |
else:
|
202 |
src_space = [m.start() for m in re.finditer(' ', source_text)]
|
203 |
-
|
|
|
|
|
|
|
204 |
|
205 |
if len(trans_commas) != 0:
|
206 |
trans_split_idx = trans_commas[len(trans_commas)//2] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2 - 1]
|
@@ -242,8 +249,9 @@ class SRT_script():
|
|
242 |
return result_list
|
243 |
|
244 |
|
245 |
-
def check_len_and_split(self, threshold=
|
246 |
-
#
|
|
|
247 |
segments = []
|
248 |
for seg in self.segments:
|
249 |
if len(seg.translation) > threshold:
|
@@ -257,7 +265,7 @@ class SRT_script():
|
|
257 |
pass
|
258 |
|
259 |
def check_len_and_split_range(self, range, threshold=30):
|
260 |
-
#
|
261 |
start_seg_id = range[0]
|
262 |
end_seg_id = range[1]
|
263 |
extra_len = 0
|
|
|
190 |
#print(lines[i])
|
191 |
pass
|
192 |
|
193 |
+
def split_seg(self, seg, threshold):
|
194 |
+
# evenly split seg to 2 parts and add new seg into self.segments
|
195 |
+
if seg.source_text[:2] == ', ':
|
196 |
+
seg.source_text = seg.source_text[2:]
|
197 |
+
if seg.translation[0] == ',':
|
198 |
+
seg.translation = seg.translation[1:]
|
199 |
source_text = seg.source_text
|
200 |
translation = seg.translation
|
201 |
src_commas = [m.start() for m in re.finditer(',', source_text)]
|
|
|
204 |
src_split_idx = src_commas[len(src_commas)//2] if len(src_commas) % 2 == 1 else src_commas[len(src_commas)//2 - 1]
|
205 |
else:
|
206 |
src_space = [m.start() for m in re.finditer(' ', source_text)]
|
207 |
+
if len(src_space) > 0:
|
208 |
+
src_split_idx = src_space[len(src_space)//2] if len(src_space) % 2 == 1 else src_space[len(src_space)//2 - 1]
|
209 |
+
else:
|
210 |
+
src_split_idx = 0
|
211 |
|
212 |
if len(trans_commas) != 0:
|
213 |
trans_split_idx = trans_commas[len(trans_commas)//2] if len(trans_commas) % 2 == 1 else trans_commas[len(trans_commas)//2 - 1]
|
|
|
249 |
return result_list
|
250 |
|
251 |
|
252 |
+
def check_len_and_split(self, threshold=30):
|
253 |
+
# DEPRECATED
|
254 |
+
# if sentence length >= threshold, split this segments to two
|
255 |
segments = []
|
256 |
for seg in self.segments:
|
257 |
if len(seg.translation) > threshold:
|
|
|
265 |
pass
|
266 |
|
267 |
def check_len_and_split_range(self, range, threshold=30):
|
268 |
+
# if sentence length >= threshold, split this segments to two
|
269 |
start_seg_id = range[0]
|
270 |
end_seg_id = range[1]
|
271 |
extra_len = 0
|
pipeline.py
CHANGED
@@ -261,10 +261,10 @@ for sentence, range in tqdm(zip(script_arr, range_arr)):
|
|
261 |
time.sleep(30)
|
262 |
flag = True
|
263 |
# add read-time output back and modify the post-processing by using one batch as an unit.
|
264 |
-
srt.set_translation(translate, range, model_name)
|
265 |
|
266 |
add_length = srt.check_len_and_split_range(range, threshold)
|
267 |
-
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx
|
268 |
# srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
269 |
|
270 |
# srt.check_len_and_split()
|
|
|
261 |
time.sleep(30)
|
262 |
flag = True
|
263 |
# add read-time output back and modify the post-processing by using one batch as an unit.
|
264 |
+
srt.set_translation(translate, range, model_name,args.link)
|
265 |
|
266 |
add_length = srt.check_len_and_split_range(range, threshold)
|
267 |
+
srt.realtime_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_zh.srt",range, add_length,segidx)
|
268 |
# srt.realtime_bilingual_write_srt(f"{RESULT_PATH}/{VIDEO_NAME}/{VIDEO_NAME}_bi.srt",range, add_length,segidx)
|
269 |
|
270 |
# srt.check_len_and_split()
|