Spaces:
Sleeping
Sleeping
File size: 3,480 Bytes
09cabee cf5f1c9 09cabee cf5f1c9 09cabee cf5f1c9 09cabee cf5f1c9 09cabee cf5f1c9 09cabee cf5f1c9 09cabee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
from datetime import timedelta
import os
import whisper
class SRT_segment(object):
def __init__(self, *args) -> None:
if isinstance(args[0], dict):
segment = args[0]
self.start_time_str = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
self.end_time_str = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
self.segment_id = segment['id']+1
self.source_text = segment['text']
self.duration = f"{self.start_time_str} --> {self.end_time_str}"
self.translation = ""
elif isinstance(args[0], list):
self.segment_id = args[0][0]
self.source_text = args[0][2]
self.duration = args[0][1]
self.start_time_str = self.duration.split("-->")[0]
self.end_time_str = self.duration.split("-->")[1]
self.translation = ""
def __str__(self) -> str:
return f'{self.segment_id}\n{self.duration}\n{self.source_text}\n\n'
def get_trans_str(self) -> str:
return f'{self.segment_id}\n{self.duration}\n{self.translation}\n\n'
def get_bilingual_str(self) -> str:
return f'{self.segment_id}\n{self.duration}\n{self.source_text}\n{self.translation}\n\n'
class SRT_script():
def __init__(self, segments) -> None:
self.segments = []
for seg in segments:
srt_seg = SRT_segment(seg)
self.segments.append(srt_seg)
@classmethod
def parse_from_srt_file(cls, path:str):
with open(path, 'r', encoding="utf-8") as f:
script_lines = f.read().splitlines()
segments = []
for i in range(len(script_lines)):
if i % 4 == 0:
segments.append(list(script_lines[i:i+4]))
return cls(segments)
def set_translation(self, translate:str, id_range:tuple):
start_seg_id = id_range[0]
end_seg_id = id_range[1]
lines = translate.split('\n\n')
print(id_range)
print(translate)
# print(len(translate))
for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
seg.translation = lines[i]
pass
def get_source_only(self):
# return a string with pure source text
result = ""
for seg in self.segments:
result+=f'{seg.source_text}\n\n'
return result
def reform_src_str(self):
result = ""
for seg in self.segments:
result += str(seg)
return result
def reform_trans_str(self):
result = ""
for seg in self.segments:
result += seg.get_trans_str()
return result
def form_bilingual_str(self):
result = ""
for seg in self.segments:
result += seg.get_bilingual_str()
return result
def write_srt_file_src(self, path:str):
# write srt file to path
with open(path, "w", encoding='utf-8') as f:
f.write(self.reform_src_str())
pass
def write_srt_file_translate(self, path:str):
with open(path, "w", encoding='utf-8') as f:
f.write(self.reform_trans_str())
pass
def write_srt_file_bilingual(self, path:str):
with open(path, "w", encoding='utf-8') as f:
f.write(self.form_bilingual_str())
pass
def correct_with_force_term():
# force term correction
pass
|