File size: 12,780 Bytes
0a00054
 
 
0f7f63b
 
d0f2803
0f7f63b
 
f1ae450
 
eb01d5d
 
04ef04e
4f0065c
f1ae450
04ef04e
 
d0f2803
0f7f63b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a00054
 
 
 
 
 
 
0f7f63b
 
 
0a00054
 
 
 
 
 
 
 
 
 
d581753
0a00054
 
b37d0d4
f1ae450
b37d0d4
 
f1ae450
b37d0d4
 
 
 
d581753
 
 
 
e4c138e
 
b37d0d4
eb9b4ad
0f7f63b
0a00054
04ef04e
 
7df592d
b37d0d4
 
 
 
 
 
d0f2803
b37d0d4
 
 
 
 
 
0f7f63b
 
d581753
0f7f63b
b37d0d4
d581753
0f7f63b
 
d658831
eb9b4ad
b37d0d4
d658831
0f7f63b
 
d658831
eb9b4ad
b37d0d4
d658831
0f7f63b
 
b37d0d4
0f7f63b
b37d0d4
0a00054
04ef04e
1a902ed
b37d0d4
 
1a902ed
cd67dcd
04ef04e
 
 
 
 
cd67dcd
04ef04e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fccaaea
04ef04e
cd67dcd
0f7f63b
d581753
0f7f63b
0a00054
f01af1f
a12b2b8
 
e4c138e
 
 
 
cd67dcd
 
eb01d5d
58d59c0
f01af1f
d0f2803
cd67dcd
eb01d5d
0f7f63b
 
 
 
 
 
 
b37d0d4
54a5e67
b37d0d4
 
0f7f63b
 
cd67dcd
0a00054
04ef04e
 
b37d0d4
 
 
 
cd67dcd
 
 
 
 
 
 
d0f2803
cd67dcd
d0f2803
04ef04e
cd67dcd
 
d0f2803
cd67dcd
 
04ef04e
cd67dcd
 
 
 
04ef04e
cd67dcd
04ef04e
 
cd67dcd
04ef04e
d0f2803
cd67dcd
d0f2803
 
 
 
cd67dcd
04ef04e
 
 
 
cd67dcd
0f7f63b
 
 
b37d0d4
0f7f63b
b37d0d4
0a00054
d0f2803
7df592d
 
d581753
 
7df592d
 
 
 
 
 
 
eb9b4ad
 
7df592d
cd90680
7df592d
 
 
eb9b4ad
7df592d
cd90680
7df592d
eb9b4ad
 
7df592d
eb9b4ad
d658831
eb9b4ad
7df592d
cd90680
 
 
7df592d
eb9b4ad
 
 
d658831
 
 
eb9b4ad
d658831
eb9b4ad
 
d0f2803
 
d658831
eb9b4ad
 
 
d658831
 
 
d0f2803
 
 
 
 
eb9b4ad
 
d658831
 
 
 
 
7df592d
d658831
cd90680
 
d658831
7df592d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
import threading
import time

import openai
from pytube import YouTube
from os import getenv, getcwd
from pathlib import Path
from enum import Enum, auto
import logging
import subprocess
from src.srt_util.srt import SrtScript
from src.srt_util.srt2ass import srt2ass
from time import time, strftime, gmtime, sleep
from src.translators.translation import get_translation, prompt_selector

import torch
import stable_whisper
import shutil

"""
Youtube link
    - link
    - model
    - output type

Video file
    - path
    - model
    - output type

Audio file
    - path
    - model
    - output type

""" 
"""
TaskID
Progress: Enum
Computing resrouce status 
SRT_Script : SrtScript
    -  input module -> initialize (ASR module)
    -  Pre-process
    -  Translation  (%)
    -  Post process (time stamp)
    -  Output module: SRT_Script --> output(.srt)
    -  (Optional) mp4
"""

class TaskStatus(str, Enum):
    CREATED = 'CREATED'
    INITIALIZING_ASR = 'INITIALIZING_ASR'
    PRE_PROCESSING = 'PRE_PROCESSING'
    TRANSLATING = 'TRANSLATING'
    POST_PROCESSING = 'POST_PROCESSING'
    OUTPUT_MODULE = 'OUTPUT_MODULE'


class Task:
    @property
    def status(self):
        with self.__status_lock:
            return self.__status

    @status.setter
    def status(self, new_status):
        with self.__status_lock:
            self.__status = new_status

    def __init__(self, task_id, task_local_dir, task_cfg):
        self.__status_lock = threading.Lock()
        self.__status = TaskStatus.CREATED
        self.gpu_status = 0
        openai.api_key = getenv("OPENAI_API_KEY")
        self.task_id = task_id
        
        self.task_local_dir = task_local_dir
        self.ASR_setting = task_cfg["ASR"]
        self.translation_setting = task_cfg["translation"]
        self.translation_model = self.translation_setting["model"]
        
        self.output_type = task_cfg["output_type"]
        self.target_lang = task_cfg["target_lang"]
        self.source_lang = task_cfg["source_lang"]
        self.field = task_cfg["field"]
        self.pre_setting = task_cfg["pre_process"]
        self.post_setting = task_cfg["post_process"]
        
        self.audio_path = None
        self.SRT_Script = None
        self.result = None
        self.s_t = None
        self.t_e = None

        print(f"Task ID: {self.task_id}")
        logging.info(f"Task ID: {self.task_id}")
        logging.info(f"{self.source_lang} -> {self.target_lang} task in {self.field}")
        logging.info(f"Translation Model: {self.translation_model}")
        logging.info(f"subtitle_type: {self.output_type['subtitle']}")
        logging.info(f"video_ouput: {self.output_type['video']}")
        logging.info(f"bilingual_ouput: {self.output_type['bilingual']}")
        logging.info("Pre-process setting:")
        for key in self.pre_setting:
            logging.info(f"{key}: {self.pre_setting[key]}")
        logging.info("Post-process setting:")
        for key in self.post_setting:
            logging.info(f"{key}: {self.post_setting[key]}")

    @staticmethod
    def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
        # convert to audio
        logging.info("Task Creation method: Youtube Link")
        return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)

    @staticmethod
    def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
        # get audio path
        logging.info("Task Creation method: Audio File")
        return AudioTask(task_id, task_dir, task_cfg, audio_path)
    
    @staticmethod
    def fromVideoFile(video_path, task_id, task_dir, task_cfg):
        # get audio path
        logging.info("Task Creation method: Video File")
        return VideoTask(task_id, task_dir, task_cfg, video_path)
    
    # Module 1 ASR: audio --> SRT_script
    def get_srt_class(self):
        # Instead of using the script_en variable directly, we'll use script_input
        # TODO: setup ASR module like translator
        self.status = TaskStatus.INITIALIZING_ASR
        self.t_s = time()

        method = self.ASR_setting["whisper_config"]["method"]
        whisper_model = self.ASR_setting["whisper_config"]["whisper_model"]
        src_srt_path = self.task_local_dir.joinpath(f"task_{self.task_id}_{self.source_lang}.srt")
        if not Path.exists(src_srt_path):
            # extract script from audio
            logging.info("extract script from audio")
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            if method == "api":
                with open(self.audio_path, 'rb') as audio_file:
                    transcript = openai.Audio.transcribe(model="whisper-1", file=audio_file, response_format="srt")
            elif method == "stable":
                model = stable_whisper.load_model(whisper_model, device)
                transcript = model.transcribe(str(self.audio_path), regroup=False,
                                                  initial_prompt="Hello, welcome to my lecture. Are you good my friend?")
                (
                    transcript
                    .split_by_punctuation(['.', '。', '?'])
                    .merge_by_gap(.15, max_words=3)
                    .merge_by_punctuation([' '])
                    .split_by_punctuation(['.', '。', '?'])
                )
                transcript = transcript.to_dict()
            
            # after get the transcript, release the gpu resource
            torch.cuda.empty_cache()

        self.SRT_Script = SrtScript(self.source_lang, self.target_lang, transcript['segments'], self.field)
        # save the srt script to local
        self.SRT_Script.write_srt_file_src(src_srt_path)

    # Module 2: SRT preprocess: perform preprocess steps
    def preprocess(self):
        self.status = TaskStatus.PRE_PROCESSING
        logging.info("--------------------Start Preprocessing SRT class--------------------")
        if self.pre_setting["sentence_form"]:
            self.SRT_Script.form_whole_sentence()
        if self.pre_setting["spell_check"]:
            self.SRT_Script.spell_check_term()
        if self.pre_setting["term_correct"]:
            self.SRT_Script.correct_with_force_term()
        processed_srt_path_src = str(Path(self.task_local_dir) / f'{self.task_id}_processed.srt')
        self.SRT_Script.write_srt_file_src(processed_srt_path_src)

        if self.output_type["subtitle"] == "ass":
            logging.info("write English .srt file to .ass")
            assSub_src = srt2ass(processed_srt_path_src, "default", "No", "Modest")
            logging.info('ASS subtitle saved as: ' + assSub_src)
        self.script_input = self.SRT_Script.get_source_only()
        pass
    
    def update_translation_progress(self, new_progress):
        if self.progress == TaskStatus.TRANSLATING:
            self.progress = TaskStatus.TRANSLATING.value[0], new_progress

    # Module 3: perform srt translation
    def translation(self):
        logging.info("---------------------Start Translation--------------------")
        prompt = prompt_selector(self.source_lang, self.target_lang, self.field)
        get_translation(self.SRT_Script, self.translation_model, self.task_id, prompt, self.translation_setting['chunk_size'])
    
    # Module 4: perform srt post process steps
    def postprocess(self):
        self.status = TaskStatus.POST_PROCESSING

        logging.info("---------------------Start Post-processing SRT class---------------------")
        if self.post_setting["check_len_and_split"]:
            self.SRT_Script.check_len_and_split()
        if self.post_setting["remove_trans_punctuation"]:
            self.SRT_Script.remove_trans_punctuation()
        logging.info("---------------------Post-processing SRT class finished---------------------")

    # Module 5: output module
    def output_render(self):
        self.status = TaskStatus.OUTPUT_MODULE
        video_out = self.output_type["video"]
        subtitle_type = self.output_type["subtitle"]
        is_bilingual = self.output_type["bilingual"]

        results_dir =f"{self.task_local_dir}/results"

        subtitle_path = f"{results_dir}/{self.task_id}_{self.target_lang}.srt"
        self.SRT_Script.write_srt_file_translate(subtitle_path)
        if is_bilingual:
            subtitle_path = f"{results_dir}/{self.task_id}_{self.source_lang}_{self.target_lang}.srt"
            self.SRT_Script.write_srt_file_bilingual(subtitle_path)

        if subtitle_type == "ass":
            logging.info("write .srt file to .ass")
            subtitle_path = srt2ass(subtitle_path, "default", "No", "Modest")
            logging.info('ASS subtitle saved as: ' + subtitle_path)

        final_res = subtitle_path

        # encode to .mp4 video file
        if video_out and self.video_path is not None:
            logging.info("encoding video file")
            logging.info(f'ffmpeg comand: \nffmpeg -i {self.video_path} -vf "subtitles={subtitle_path}" {results_dir}/{self.task_id}.mp4')
            subprocess.run(
                ["ffmpeg",
                    "-i", self.video_path,
                    "-vf", f"subtitles={subtitle_path}",
                    f"{results_dir}/{self.task_id}.mp4"])
            final_res = f"{results_dir}/{self.task_id}.mp4"

        self.t_e = time()
        logging.info(
            "Pipeline finished, time duration:{}".format(strftime("%H:%M:%S", gmtime(self.t_e - self.t_s))))
        return final_res
    
    def run_pipeline(self):
        self.get_srt_class()
        self.preprocess()
        self.translation()
        self.postprocess()
        self.result = self.output_render()
        # print(self.result)

class YoutubeTask(Task):
    def __init__(self, task_id, task_local_dir, task_cfg, youtube_url):
        super().__init__(task_id, task_local_dir, task_cfg)
        self.youtube_url = youtube_url

    def run(self):
        yt = YouTube(self.youtube_url)
        video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()

        if video:
            video.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp4")
            logging.info(f'Video Name: {video.default_filename}')
        else:
            raise FileNotFoundError(f" Video stream not found for link {self.youtube_url}")

        audio = yt.streams.filter(only_audio=True).first()
        if audio:
            audio.download(str(self.task_local_dir), filename=f"task_{self.task_id}.mp3")
        else:
            logging.info(" download audio failed, using ffmpeg to extract audio")
            subprocess.run(
                ['ffmpeg', '-i', self.task_local_dir.joinpath(f"task_{self.task_id}.mp4"), '-f', 'mp3',
                 '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
            logging.info("audio extraction finished")
        
        self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
        self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")

        logging.info(f" Video File Dir: {self.video_path}")
        logging.info(f" Audio File Dir: {self.audio_path}")
        logging.info(" Data Prep Complete. Start pipeline")

        super().run_pipeline()

class AudioTask(Task):
    def __init__(self, task_id, task_local_dir, task_cfg, audio_path):
        super().__init__(task_id, task_local_dir, task_cfg)
        # TODO: check audio format
        self.audio_path = audio_path
        self.video_path = None

    def run(self):
        logging.info(f"Video File Dir: {self.video_path}")
        logging.info(f"Audio File Dir: {self.audio_path}")
        logging.info("Data Prep Complete. Start pipeline")
        super().run_pipeline()

class VideoTask(Task):
    def __init__(self, task_id, task_local_dir, task_cfg, video_path):
        super().__init__(task_id, task_local_dir, task_cfg)
        # TODO: check video format {.mp4}
        new_video_path = f"{task_local_dir}/task_{self.task_id}.mp4"
        print(new_video_path)
        logging.info(f"Copy video file to: {new_video_path}")
        shutil.copyfile(video_path, new_video_path)
        self.video_path = new_video_path

    def run(self):
        logging.info("using ffmpeg to extract audio")
        subprocess.run(
                ['ffmpeg', '-i', self.video_path, '-f', 'mp3',
                 '-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
        logging.info("audio extraction finished")

        self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
        logging.info(f" Video File Dir: {self.video_path}")
        logging.info(f" Audio File Dir: {self.audio_path}")
        logging.info("Data Prep Complete. Start pipeline")
        super().run_pipeline()