Spaces:
Sleeping
Sleeping
task initialization done
Browse filesFormer-commit-id: c84c9e1172e01e6a6cdb30d3d1f79f6f38e76a04
- configs/task_config.yaml +1 -1
- entries/run.py +15 -6
- src/task.py +46 -24
configs/task_config.yaml
CHANGED
@@ -6,5 +6,5 @@ output_type:
|
|
6 |
video: False
|
7 |
bilingal: False
|
8 |
source_lang: EN
|
9 |
-
target_lang:
|
10 |
field: SC2
|
|
|
6 |
video: False
|
7 |
bilingal: False
|
8 |
source_lang: EN
|
9 |
+
target_lang: ZH
|
10 |
field: SC2
|
entries/run.py
CHANGED
@@ -18,7 +18,7 @@ def parse_args():
|
|
18 |
parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
|
19 |
parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
|
20 |
parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
|
21 |
-
|
22 |
args = parser.parse_args()
|
23 |
|
24 |
return args
|
@@ -27,7 +27,7 @@ if __name__ == "__main__":
|
|
27 |
# read args and configs
|
28 |
args = parse_args()
|
29 |
launch_cfg = load(open(args.launch_cfg), Loader=Loader)
|
30 |
-
|
31 |
|
32 |
# initialize dir
|
33 |
local_dir = Path(launch_cfg['local_dump'])
|
@@ -41,7 +41,6 @@ if __name__ == "__main__":
|
|
41 |
task_dir = local_dir.joinpath(f"task_{task_id}")
|
42 |
task_dir.mkdir(parents=False, exist_ok=False)
|
43 |
task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
|
44 |
-
task_dir.joinpath("logs").mkdir(parents=False, exist_ok=False)
|
45 |
|
46 |
# logging
|
47 |
logging.basicConfig(level=logging.INFO, handlers=[
|
@@ -49,12 +48,22 @@ if __name__ == "__main__":
|
|
49 |
"{}/{}_{}.log".format(task_dir, f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
|
50 |
'w', encoding='utf-8')])
|
51 |
|
52 |
-
# TODO: write task info into log
|
53 |
-
|
54 |
# Task create
|
55 |
if args.link is not None:
|
56 |
try:
|
57 |
-
task = Task.fromYoutubeLink(args.link, task_id,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
except:
|
59 |
shutil.rmtree(task_dir)
|
60 |
raise RuntimeError("failed to create task from youtube link")
|
|
|
18 |
parser.add_argument("--srt_file", help="srt file input path here", default=None, type=str, required=False)
|
19 |
parser.add_argument("--continue", help="task_id that need to continue", default=None, type=str, required=False) # need implement
|
20 |
parser.add_argument("--launch_cfg", help="launch config path", default='./configs/local_launch.yaml', type=str, required=False)
|
21 |
+
parser.add_argument("--task_cfg", help="task config path", default='./configs/task_config.yaml', type=str, required=False)
|
22 |
args = parser.parse_args()
|
23 |
|
24 |
return args
|
|
|
27 |
# read args and configs
|
28 |
args = parse_args()
|
29 |
launch_cfg = load(open(args.launch_cfg), Loader=Loader)
|
30 |
+
task_cfg = load(open(args.task_cfg), Loader=Loader)
|
31 |
|
32 |
# initialize dir
|
33 |
local_dir = Path(launch_cfg['local_dump'])
|
|
|
41 |
task_dir = local_dir.joinpath(f"task_{task_id}")
|
42 |
task_dir.mkdir(parents=False, exist_ok=False)
|
43 |
task_dir.joinpath("results").mkdir(parents=False, exist_ok=False)
|
|
|
44 |
|
45 |
# logging
|
46 |
logging.basicConfig(level=logging.INFO, handlers=[
|
|
|
48 |
"{}/{}_{}.log".format(task_dir, f"task_{task_id}", datetime.now().strftime("%m%d%Y_%H%M%S")),
|
49 |
'w', encoding='utf-8')])
|
50 |
|
|
|
|
|
51 |
# Task create
|
52 |
if args.link is not None:
|
53 |
try:
|
54 |
+
task = Task.fromYoutubeLink(args.link, task_id, task_dir, task_cfg)
|
55 |
+
except:
|
56 |
+
shutil.rmtree(task_dir)
|
57 |
+
raise RuntimeError("failed to create task from youtube link")
|
58 |
+
elif args.video_file is not None:
|
59 |
+
try:
|
60 |
+
task = Task.fromVideoFile(args.video_file, task_id, task_dir, task_cfg)
|
61 |
+
except:
|
62 |
+
shutil.rmtree(task_dir)
|
63 |
+
raise RuntimeError("failed to create task from youtube link")
|
64 |
+
elif args.audio_file is not None:
|
65 |
+
try:
|
66 |
+
task = Task.fromVideoFile(args.audio_file, task_id, task_dir, task_cfg)
|
67 |
except:
|
68 |
shutil.rmtree(task_dir)
|
69 |
raise RuntimeError("failed to create task from youtube link")
|
src/task.py
CHANGED
@@ -64,36 +64,47 @@ class Task:
|
|
64 |
with self.__status_lock:
|
65 |
self.__status = new_status
|
66 |
|
67 |
-
def __init__(self, task_id, task_local_dir,
|
68 |
self.__status_lock = threading.Lock()
|
69 |
self.__status = TaskStatus.CREATED
|
70 |
openai.api_key = getenv("OPENAI_API_KEY")
|
71 |
-
self.launch_info =
|
72 |
self.task_local_dir = task_local_dir
|
73 |
-
self.model =
|
74 |
self.gpu_status = 0
|
75 |
-
self.output_type =
|
|
|
|
|
|
|
76 |
self.task_id = task_id
|
77 |
self.audio_path = None
|
78 |
self.SRT_Script = None
|
79 |
self.result = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
@staticmethod
|
82 |
-
def fromYoutubeLink(youtube_url, task_id,
|
83 |
-
#
|
84 |
logging.info("Task Creation method: Youtube Link")
|
85 |
-
|
86 |
-
return YoutubeTask(task_id, local_dump.joinpath(f"task_{task_id}"), launch_info, youtube_url)
|
87 |
|
88 |
@staticmethod
|
89 |
-
def fromAudioFile():
|
90 |
# get audio path
|
91 |
-
|
|
|
92 |
|
93 |
@staticmethod
|
94 |
-
def fromVideoFile():
|
95 |
# get audio path
|
96 |
-
|
|
|
97 |
|
98 |
# Module 1 ASR: audio --> SRT_script
|
99 |
def get_srt_class(self, whisper_model='tiny', method="stable"):
|
@@ -103,7 +114,8 @@ class Task:
|
|
103 |
time.sleep(5)
|
104 |
pass
|
105 |
|
106 |
-
# Module 2: SRT preprocess: perform preprocess steps
|
|
|
107 |
def preprocess(self):
|
108 |
self.status = TaskStatus.PRE_PROCESSING
|
109 |
logging.info("--------------------Start Preprocessing SRT class--------------------")
|
@@ -113,7 +125,7 @@ class Task:
|
|
113 |
processed_srt_path_en = str(Path(self.task_local_dir).with_suffix('')) + '_processed.srt'
|
114 |
self.SRT_Script.write_srt_file_src(processed_srt_path_en)
|
115 |
|
116 |
-
if self.output_type == "ass":
|
117 |
logging.info("write English .srt file to .ass")
|
118 |
assSub_en = srt2ass(processed_srt_path_en)
|
119 |
logging.info('ASS subtitle saved as: ' + assSub_en)
|
@@ -140,7 +152,6 @@ class Task:
|
|
140 |
def output_render(self):
|
141 |
self.status = TaskStatus.OUTPUT_MODULE
|
142 |
return "TODO"
|
143 |
-
pass
|
144 |
|
145 |
def run_pipeline(self):
|
146 |
self.get_srt_class()
|
@@ -150,8 +161,8 @@ class Task:
|
|
150 |
self.result = self.output_render()
|
151 |
|
152 |
class YoutubeTask(Task):
|
153 |
-
def __init__(self, task_id, task_local_dir,
|
154 |
-
super().__init__(task_id, task_local_dir,
|
155 |
self.youtube_url = youtube_url
|
156 |
|
157 |
def run(self):
|
@@ -176,26 +187,37 @@ class YoutubeTask(Task):
|
|
176 |
'-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
|
177 |
logging.info("audio extraction finished")
|
178 |
|
|
|
179 |
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
180 |
|
181 |
-
logging.info("
|
182 |
|
183 |
super().run_pipeline()
|
184 |
|
185 |
class AudioTask(Task):
|
186 |
-
def __init__(self, task_id, task_local_dir,
|
187 |
-
super().__init__(task_id, task_local_dir,
|
|
|
188 |
self.audio_path = audio_path
|
|
|
189 |
|
190 |
def run(self):
|
|
|
191 |
super().run_pipeline()
|
192 |
|
193 |
class VideoTask(Task):
|
194 |
-
def __init__(self, task_id, task_local_dir,
|
195 |
-
super().__init__(task_id, task_local_dir,
|
196 |
-
|
|
|
197 |
|
198 |
def run(self):
|
199 |
-
|
|
|
|
|
|
|
|
|
200 |
|
|
|
|
|
201 |
super().run_pipeline()
|
|
|
64 |
with self.__status_lock:
|
65 |
self.__status = new_status
|
66 |
|
67 |
+
def __init__(self, task_id, task_local_dir, task_cfg):
|
68 |
self.__status_lock = threading.Lock()
|
69 |
self.__status = TaskStatus.CREATED
|
70 |
openai.api_key = getenv("OPENAI_API_KEY")
|
71 |
+
self.launch_info = task_cfg # do not use, just for fallback
|
72 |
self.task_local_dir = task_local_dir
|
73 |
+
self.model = task_cfg["model"]
|
74 |
self.gpu_status = 0
|
75 |
+
self.output_type = task_cfg["output_type"]
|
76 |
+
self.target_lang = task_cfg["target_lang"]
|
77 |
+
self.source_lang = task_cfg["source_lang"]
|
78 |
+
self.field = task_cfg["field"]
|
79 |
self.task_id = task_id
|
80 |
self.audio_path = None
|
81 |
self.SRT_Script = None
|
82 |
self.result = None
|
83 |
+
print(f" Task ID: {self.task_id}")
|
84 |
+
logging.info(f" Task ID: {self.task_id}")
|
85 |
+
logging.info(f" {self.source_lang} -> {self.target_lang} task in {self.field}")
|
86 |
+
logging.info(f" Model: {self.model}")
|
87 |
+
logging.info(f" subtitle_type: {self.output_type['subtitle']}")
|
88 |
+
logging.info(f" video_ouput: \t{self.output_type['video']}")
|
89 |
+
logging.info(f" bilingal_ouput: \t{self.output_type['bilingal']}")
|
90 |
|
91 |
@staticmethod
|
92 |
+
def fromYoutubeLink(youtube_url, task_id, task_dir, task_cfg):
|
93 |
+
# convert to audio
|
94 |
logging.info("Task Creation method: Youtube Link")
|
95 |
+
return YoutubeTask(task_id, task_dir, task_cfg, youtube_url)
|
|
|
96 |
|
97 |
@staticmethod
|
98 |
+
def fromAudioFile(audio_path, task_id, task_dir, task_cfg):
|
99 |
# get audio path
|
100 |
+
logging.info("Task Creation method: Audio File")
|
101 |
+
return AudioTask(task_id, task_dir, task_cfg, audio_path)
|
102 |
|
103 |
@staticmethod
|
104 |
+
def fromVideoFile(video_path, task_id, task_dir, task_cfg):
|
105 |
# get audio path
|
106 |
+
logging.info("Task Creation method: Video File")
|
107 |
+
return VideoTask(task_id, task_dir, task_cfg, video_path)
|
108 |
|
109 |
# Module 1 ASR: audio --> SRT_script
|
110 |
def get_srt_class(self, whisper_model='tiny', method="stable"):
|
|
|
114 |
time.sleep(5)
|
115 |
pass
|
116 |
|
117 |
+
# Module 2: SRT preprocess: perform preprocess steps
|
118 |
+
# TODO: multi-lang and multi-field support according to task_cfg
|
119 |
def preprocess(self):
|
120 |
self.status = TaskStatus.PRE_PROCESSING
|
121 |
logging.info("--------------------Start Preprocessing SRT class--------------------")
|
|
|
125 |
processed_srt_path_en = str(Path(self.task_local_dir).with_suffix('')) + '_processed.srt'
|
126 |
self.SRT_Script.write_srt_file_src(processed_srt_path_en)
|
127 |
|
128 |
+
if self.output_type["subtitle"] == "ass":
|
129 |
logging.info("write English .srt file to .ass")
|
130 |
assSub_en = srt2ass(processed_srt_path_en)
|
131 |
logging.info('ASS subtitle saved as: ' + assSub_en)
|
|
|
152 |
def output_render(self):
|
153 |
self.status = TaskStatus.OUTPUT_MODULE
|
154 |
return "TODO"
|
|
|
155 |
|
156 |
def run_pipeline(self):
|
157 |
self.get_srt_class()
|
|
|
161 |
self.result = self.output_render()
|
162 |
|
163 |
class YoutubeTask(Task):
|
164 |
+
def __init__(self, task_id, task_local_dir, task_cfg, youtube_url):
|
165 |
+
super().__init__(task_id, task_local_dir, task_cfg)
|
166 |
self.youtube_url = youtube_url
|
167 |
|
168 |
def run(self):
|
|
|
187 |
'-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
|
188 |
logging.info("audio extraction finished")
|
189 |
|
190 |
+
self.video_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp4")
|
191 |
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
192 |
|
193 |
+
logging.info("Data Prep Complete. Start pipeline")
|
194 |
|
195 |
super().run_pipeline()
|
196 |
|
197 |
class AudioTask(Task):
|
198 |
+
def __init__(self, task_id, task_local_dir, task_cfg, audio_path):
|
199 |
+
super().__init__(task_id, task_local_dir, task_cfg)
|
200 |
+
# TODO: check audio format
|
201 |
self.audio_path = audio_path
|
202 |
+
self.video_path = None
|
203 |
|
204 |
def run(self):
|
205 |
+
logging.info("Data Prep Complete. Start pipeline")
|
206 |
super().run_pipeline()
|
207 |
|
208 |
class VideoTask(Task):
|
209 |
+
def __init__(self, task_id, task_local_dir, task_cfg, video_path):
|
210 |
+
super().__init__(task_id, task_local_dir, task_cfg)
|
211 |
+
# TODO: check video format {.mp4}
|
212 |
+
self.video_path = video_path
|
213 |
|
214 |
def run(self):
|
215 |
+
logging.info("using ffmpeg to extract audio")
|
216 |
+
subprocess.run(
|
217 |
+
['ffmpeg', '-i', self.video_path, '-f', 'mp3',
|
218 |
+
'-ab', '192000', '-vn', self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")])
|
219 |
+
logging.info("audio extraction finished")
|
220 |
|
221 |
+
self.audio_path = self.task_local_dir.joinpath(f"task_{self.task_id}.mp3")
|
222 |
+
logging.info("Data Prep Complete. Start pipeline")
|
223 |
super().run_pipeline()
|