File size: 3,480 Bytes
09cabee
 
 
 
 
cf5f1c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09cabee
 
 
 
 
 
 
cf5f1c9
 
 
 
 
 
 
 
 
 
09cabee
cf5f1c9
 
 
 
 
 
 
 
 
 
 
 
 
09cabee
 
cf5f1c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09cabee
cf5f1c9
 
09cabee
 
cf5f1c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
09cabee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from datetime import timedelta
import os
import whisper

class SRT_segment(object):
    def __init__(self, *args) -> None:
        if isinstance(args[0], dict):
            segment = args[0]
            self.start_time_str = str(0)+str(timedelta(seconds=int(segment['start'])))+',000'
            self.end_time_str = str(0)+str(timedelta(seconds=int(segment['end'])))+',000'
            self.segment_id = segment['id']+1
            self.source_text = segment['text']
            self.duration = f"{self.start_time_str} --> {self.end_time_str}"
            self.translation = ""
        elif isinstance(args[0], list):
            self.segment_id = args[0][0]
            self.source_text = args[0][2]
            self.duration = args[0][1]
            self.start_time_str = self.duration.split("-->")[0]
            self.end_time_str = self.duration.split("-->")[1]
            self.translation = ""
    
    def __str__(self) -> str:
        return  f'{self.segment_id}\n{self.duration}\n{self.source_text}\n\n'
    
    def get_trans_str(self) -> str:
        return f'{self.segment_id}\n{self.duration}\n{self.translation}\n\n'
    
    def get_bilingual_str(self) -> str:
        return f'{self.segment_id}\n{self.duration}\n{self.source_text}\n{self.translation}\n\n'

class SRT_script():
    def __init__(self, segments) -> None:
        self.segments = []
        for seg in segments:
            srt_seg = SRT_segment(seg)
            self.segments.append(srt_seg)

    @classmethod
    def parse_from_srt_file(cls, path:str):
        with open(path, 'r', encoding="utf-8") as f:
            script_lines = f.read().splitlines() 

        segments = []
        for i in range(len(script_lines)):
            if i % 4 == 0:
                segments.append(list(script_lines[i:i+4]))
        
        return cls(segments)

    def set_translation(self, translate:str, id_range:tuple):
        start_seg_id = id_range[0]
        end_seg_id = id_range[1]

        lines = translate.split('\n\n')
        print(id_range)
        print(translate)
        # print(len(translate))

        for i, seg in enumerate(self.segments[start_seg_id-1:end_seg_id]):
            seg.translation = lines[i]
        pass

    def get_source_only(self):
        # return a string with pure source text
        result = ""
        for seg in self.segments:
            result+=f'{seg.source_text}\n\n'
        
        return result
    
    def reform_src_str(self):
        result = ""
        for seg in self.segments:
            result += str(seg)
        return result

    def reform_trans_str(self):
        result = ""
        for seg in self.segments:
            result += seg.get_trans_str()
        return result
    
    def form_bilingual_str(self):
        result = ""
        for seg in self.segments:
            result += seg.get_bilingual_str()
        return result

    def write_srt_file_src(self, path:str):
        # write srt file to path
        with open(path, "w", encoding='utf-8') as f:
            f.write(self.reform_src_str())
        pass

    def write_srt_file_translate(self, path:str):
        with open(path, "w", encoding='utf-8') as f:
            f.write(self.reform_trans_str())
        pass

    def write_srt_file_bilingual(self, path:str):
        with open(path, "w", encoding='utf-8') as f:
            f.write(self.form_bilingual_str())
        pass

    def correct_with_force_term():
        # force term correction

        pass