imansarraf commited on
Commit
10bc845
·
verified ·
1 Parent(s): ccb1371

Upload 10 files

Browse files
autosub/__init__-0.4.0.py ADDED
@@ -0,0 +1,405 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Defines autosub's main functionality.
3
+ """
4
+
5
+ #!/usr/bin/env python
6
+
7
+ from __future__ import absolute_import, print_function, unicode_literals
8
+
9
+ import argparse
10
+ import audioop
11
+ import json
12
+ import math
13
+ import multiprocessing
14
+ import os
15
+ import subprocess
16
+ import sys
17
+ import tempfile
18
+ import wave
19
+
20
+ import requests
21
+ from googleapiclient.discovery import build
22
+ from progressbar import ProgressBar, Percentage, Bar, ETA
23
+
24
+ from autosub.constants import (
25
+ LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
26
+ )
27
+ from autosub.formatters import FORMATTERS
28
+
29
+ DEFAULT_SUBTITLE_FORMAT = 'srt'
30
+ DEFAULT_CONCURRENCY = 10
31
+ DEFAULT_SRC_LANGUAGE = 'en'
32
+ DEFAULT_DST_LANGUAGE = 'en'
33
+
34
+
35
+ def percentile(arr, percent):
36
+ """
37
+ Calculate the given percentile of arr.
38
+ """
39
+ arr = sorted(arr)
40
+ index = (len(arr) - 1) * percent
41
+ floor = math.floor(index)
42
+ ceil = math.ceil(index)
43
+ if floor == ceil:
44
+ return arr[int(index)]
45
+ low_value = arr[int(floor)] * (ceil - index)
46
+ high_value = arr[int(ceil)] * (index - floor)
47
+ return low_value + high_value
48
+
49
+
50
+ class FLACConverter(object): # pylint: disable=too-few-public-methods
51
+ """
52
+ Class for converting a region of an input audio or video file into a FLAC audio file
53
+ """
54
+ def __init__(self, source_path, include_before=0.25, include_after=0.25):
55
+ self.source_path = source_path
56
+ self.include_before = include_before
57
+ self.include_after = include_after
58
+
59
+ def __call__(self, region):
60
+ try:
61
+ start, end = region
62
+ start = max(0, start - self.include_before)
63
+ end += self.include_after
64
+ temp = tempfile.NamedTemporaryFile(suffix='.flac')
65
+ command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
66
+ "-y", "-i", self.source_path,
67
+ "-loglevel", "error", temp.name]
68
+ use_shell = True if os.name == "nt" else False
69
+ subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
70
+ return temp.read()
71
+
72
+ except KeyboardInterrupt:
73
+ return None
74
+
75
+
76
+ class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
77
+ """
78
+ Class for performing speech-to-text for an input FLAC file.
79
+ """
80
+ def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
81
+ self.language = language
82
+ self.rate = rate
83
+ self.api_key = api_key
84
+ self.retries = retries
85
+
86
+ def __call__(self, data):
87
+ try:
88
+ for _ in range(self.retries):
89
+ url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
90
+ headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
91
+
92
+ try:
93
+ resp = requests.post(url, data=data, headers=headers)
94
+ except requests.exceptions.ConnectionError:
95
+ continue
96
+
97
+ for line in resp.content.decode('utf-8').split("\n"):
98
+ try:
99
+ line = json.loads(line)
100
+ line = line['result'][0]['alternative'][0]['transcript']
101
+ return line[:1].upper() + line[1:]
102
+ except IndexError:
103
+ # no result
104
+ continue
105
+
106
+ except KeyboardInterrupt:
107
+ return None
108
+
109
+
110
+ class Translator(object): # pylint: disable=too-few-public-methods
111
+ """
112
+ Class for translating a sentence from a one language to another.
113
+ """
114
+ def __init__(self, language, api_key, src, dst):
115
+ self.language = language
116
+ self.api_key = api_key
117
+ self.service = build('translate', 'v2',
118
+ developerKey=self.api_key)
119
+ self.src = src
120
+ self.dst = dst
121
+
122
+ def __call__(self, sentence):
123
+ try:
124
+ if not sentence:
125
+ return None
126
+
127
+ result = self.service.translations().list( # pylint: disable=no-member
128
+ source=self.src,
129
+ target=self.dst,
130
+ q=[sentence]
131
+ ).execute()
132
+
133
+ if 'translations' in result and result['translations'] and \
134
+ 'translatedText' in result['translations'][0]:
135
+ return result['translations'][0]['translatedText']
136
+
137
+ return None
138
+
139
+ except KeyboardInterrupt:
140
+ return None
141
+
142
+
143
+ def which(program):
144
+ """
145
+ Return the path for a given executable.
146
+ """
147
+ def is_exe(file_path):
148
+ """
149
+ Checks whether a file is executable.
150
+ """
151
+ return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
152
+
153
+ fpath, _ = os.path.split(program)
154
+ if fpath:
155
+ if is_exe(program):
156
+ return program
157
+ else:
158
+ for path in os.environ["PATH"].split(os.pathsep):
159
+ path = path.strip('"')
160
+ exe_file = os.path.join(path, program)
161
+ if is_exe(exe_file):
162
+ return exe_file
163
+ return None
164
+
165
+
166
+ def extract_audio(filename, channels=1, rate=16000):
167
+ """
168
+ Extract audio from an input file to a temporary WAV file.
169
+ """
170
+ temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
171
+ if not os.path.isfile(filename):
172
+ print("The given file does not exist: {}".format(filename))
173
+ raise Exception("Invalid filepath: {}".format(filename))
174
+ if not which("ffmpeg"):
175
+ print("ffmpeg: Executable not found on machine.")
176
+ raise Exception("Dependency not found: ffmpeg")
177
+ command = ["ffmpeg", "-y", "-i", filename,
178
+ "-ac", str(channels), "-ar", str(rate),
179
+ "-loglevel", "error", temp.name]
180
+ use_shell = True if os.name == "nt" else False
181
+ subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
182
+ return temp.name, rate
183
+
184
+
185
+ def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
186
+ """
187
+ Perform voice activity detection on a given audio file.
188
+ """
189
+ reader = wave.open(filename)
190
+ sample_width = reader.getsampwidth()
191
+ rate = reader.getframerate()
192
+ n_channels = reader.getnchannels()
193
+ chunk_duration = float(frame_width) / rate
194
+
195
+ n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
196
+ energies = []
197
+
198
+ for _ in range(n_chunks):
199
+ chunk = reader.readframes(frame_width)
200
+ energies.append(audioop.rms(chunk, sample_width * n_channels))
201
+
202
+ threshold = percentile(energies, 0.2)
203
+
204
+ elapsed_time = 0
205
+
206
+ regions = []
207
+ region_start = None
208
+
209
+ for energy in energies:
210
+ is_silence = energy <= threshold
211
+ max_exceeded = region_start and elapsed_time - region_start >= max_region_size
212
+
213
+ if (max_exceeded or is_silence) and region_start:
214
+ if elapsed_time - region_start >= min_region_size:
215
+ regions.append((region_start, elapsed_time))
216
+ region_start = None
217
+
218
+ elif (not region_start) and (not is_silence):
219
+ region_start = elapsed_time
220
+ elapsed_time += chunk_duration
221
+ return regions
222
+
223
+
224
+ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
225
+ source_path,
226
+ output=None,
227
+ concurrency=DEFAULT_CONCURRENCY,
228
+ src_language=DEFAULT_SRC_LANGUAGE,
229
+ dst_language=DEFAULT_DST_LANGUAGE,
230
+ subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
231
+ api_key=None,
232
+ ):
233
+ """
234
+ Given an input audio/video file, generate subtitles in the specified language and format.
235
+ """
236
+ audio_filename, audio_rate = extract_audio(source_path)
237
+
238
+ regions = find_speech_regions(audio_filename)
239
+
240
+ pool = multiprocessing.Pool(concurrency)
241
+ converter = FLACConverter(source_path=audio_filename)
242
+ recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
243
+ api_key=GOOGLE_SPEECH_API_KEY)
244
+
245
+ transcripts = []
246
+ if regions:
247
+ try:
248
+ widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
249
+ ETA()]
250
+ pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
251
+ extracted_regions = []
252
+ for i, extracted_region in enumerate(pool.imap(converter, regions)):
253
+ extracted_regions.append(extracted_region)
254
+ pbar.update(i)
255
+ pbar.finish()
256
+
257
+ widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
258
+ pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
259
+
260
+ for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
261
+ transcripts.append(transcript)
262
+ pbar.update(i)
263
+ pbar.finish()
264
+
265
+ if src_language.split("-")[0] != dst_language.split("-")[0]:
266
+ if api_key:
267
+ google_translate_api_key = api_key
268
+ translator = Translator(dst_language, google_translate_api_key,
269
+ dst=dst_language,
270
+ src=src_language)
271
+ prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
272
+ widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
273
+ pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
274
+ translated_transcripts = []
275
+ for i, transcript in enumerate(pool.imap(translator, transcripts)):
276
+ translated_transcripts.append(transcript)
277
+ pbar.update(i)
278
+ pbar.finish()
279
+ transcripts = translated_transcripts
280
+ else:
281
+ print(
282
+ "Error: Subtitle translation requires specified Google Translate API key. "
283
+ "See --help for further information."
284
+ )
285
+ return 1
286
+
287
+ except KeyboardInterrupt:
288
+ pbar.finish()
289
+ pool.terminate()
290
+ pool.join()
291
+ print("Cancelling transcription")
292
+ raise
293
+
294
+ timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
295
+ formatter = FORMATTERS.get(subtitle_file_format)
296
+ formatted_subtitles = formatter(timed_subtitles)
297
+
298
+ dest = output
299
+
300
+ if not dest:
301
+ base = os.path.splitext(source_path)[0]
302
+ dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
303
+
304
+ with open(dest, 'wb') as output_file:
305
+ output_file.write(formatted_subtitles.encode("utf-8"))
306
+
307
+ os.remove(audio_filename)
308
+
309
+ return dest
310
+
311
+
312
+ def validate(args):
313
+ """
314
+ Check that the CLI arguments passed to autosub are valid.
315
+ """
316
+ if args.format not in FORMATTERS:
317
+ print(
318
+ "Subtitle format not supported. "
319
+ "Run with --list-formats to see all supported formats."
320
+ )
321
+ return False
322
+
323
+ if args.src_language not in LANGUAGE_CODES.keys():
324
+ print(
325
+ "Source language not supported. "
326
+ "Run with --list-languages to see all supported languages."
327
+ )
328
+ return False
329
+
330
+ if args.dst_language not in LANGUAGE_CODES.keys():
331
+ print(
332
+ "Destination language not supported. "
333
+ "Run with --list-languages to see all supported languages."
334
+ )
335
+ return False
336
+
337
+ if not args.source_path:
338
+ print("Error: You need to specify a source path.")
339
+ return False
340
+
341
+ return True
342
+
343
+
344
+ def main():
345
+ """
346
+ Run autosub as a command-line program.
347
+ """
348
+ parser = argparse.ArgumentParser()
349
+ parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
350
+ nargs='?')
351
+ parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
352
+ type=int, default=DEFAULT_CONCURRENCY)
353
+ parser.add_argument('-o', '--output',
354
+ help="Output path for subtitles (by default, subtitles are saved in \
355
+ the same directory and name as the source path)")
356
+ parser.add_argument('-F', '--format', help="Destination subtitle format",
357
+ default=DEFAULT_SUBTITLE_FORMAT)
358
+ parser.add_argument('-S', '--src-language', help="Language spoken in source file",
359
+ default=DEFAULT_SRC_LANGUAGE)
360
+ parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
361
+ default=DEFAULT_DST_LANGUAGE)
362
+ parser.add_argument('-K', '--api-key',
363
+ help="The Google Translate API key to be used. \
364
+ (Required for subtitle translation)")
365
+ parser.add_argument('--list-formats', help="List all available subtitle formats",
366
+ action='store_true')
367
+ parser.add_argument('--list-languages', help="List all available source/destination languages",
368
+ action='store_true')
369
+
370
+ args = parser.parse_args()
371
+
372
+ if args.list_formats:
373
+ print("List of formats:")
374
+ for subtitle_format in FORMATTERS:
375
+ print("{format}".format(format=subtitle_format))
376
+ return 0
377
+
378
+ if args.list_languages:
379
+ print("List of all languages:")
380
+ for code, language in sorted(LANGUAGE_CODES.items()):
381
+ print("{code}\t{language}".format(code=code, language=language))
382
+ return 0
383
+
384
+ if not validate(args):
385
+ return 1
386
+
387
+ try:
388
+ subtitle_file_path = generate_subtitles(
389
+ source_path=args.source_path,
390
+ concurrency=args.concurrency,
391
+ src_language=args.src_language,
392
+ dst_language=args.dst_language,
393
+ api_key=args.api_key,
394
+ subtitle_file_format=args.format,
395
+ output=args.output,
396
+ )
397
+ print("Subtitles file created at {}".format(subtitle_file_path))
398
+ except KeyboardInterrupt:
399
+ return 1
400
+
401
+ return 0
402
+
403
+
404
+ if __name__ == '__main__':
405
+ sys.exit(main())
autosub/__init__.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Defines autosub's main functionality.
3
+ """
4
+
5
+ #!/usr/bin/env python
6
+
7
+ from __future__ import absolute_import, print_function, unicode_literals
8
+
9
+ import argparse
10
+ import audioop
11
+ import math
12
+ import multiprocessing
13
+ import os
14
+ from json import JSONDecodeError
15
+ import subprocess
16
+ import sys
17
+ import tempfile
18
+ import wave
19
+ import json
20
+ import requests
21
+ try:
22
+ from json.decoder import JSONDecodeError
23
+ except ImportError:
24
+ JSONDecodeError = ValueError
25
+
26
+ from googleapiclient.discovery import build
27
+ from progressbar import ProgressBar, Percentage, Bar, ETA
28
+
29
+ from autosub.constants import (
30
+ LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
31
+ )
32
+ from autosub.formatters import FORMATTERS
33
+ from pathlib import PurePath
34
+
35
+ DEFAULT_SUBTITLE_FORMAT = 'srt'
36
+ DEFAULT_CONCURRENCY = 10
37
+ DEFAULT_SRC_LANGUAGE = 'en'
38
+ DEFAULT_DST_LANGUAGE = 'en'
39
+
40
+
41
+ def percentile(arr, percent):
42
+ """
43
+ Calculate the given percentile of arr.
44
+ """
45
+ arr = sorted(arr)
46
+ index = (len(arr) - 1) * percent
47
+ floor = math.floor(index)
48
+ ceil = math.ceil(index)
49
+ if floor == ceil:
50
+ return arr[int(index)]
51
+ low_value = arr[int(floor)] * (ceil - index)
52
+ high_value = arr[int(ceil)] * (index - floor)
53
+ return low_value + high_value
54
+
55
+
56
+ class FLACConverter(object): # pylint: disable=too-few-public-methods
57
+ """
58
+ Class for converting a region of an input audio or video file into a FLAC audio file
59
+ """
60
+ def __init__(self, source_path, include_before=0.25, include_after=0.25):
61
+ self.source_path = source_path
62
+ self.include_before = include_before
63
+ self.include_after = include_after
64
+
65
+ def __call__(self, region):
66
+ try:
67
+ start, end = region
68
+ start = max(0, start - self.include_before)
69
+ end += self.include_after
70
+ #delete=False necessary for running on Windows
71
+ temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
72
+ program_ffmpeg = which("ffmpeg")
73
+ command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
74
+ "-y", "-i", self.source_path,
75
+ "-loglevel", "error", temp.name]
76
+ use_shell = True if os.name == "nt" else False
77
+ subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
78
+ read_data = temp.read()
79
+ temp.close()
80
+ os.unlink(temp.name)
81
+ return read_data
82
+
83
+ except KeyboardInterrupt:
84
+ return None
85
+
86
+
87
+ class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
88
+ """
89
+ Class for performing speech-to-text for an input FLAC file.
90
+ """
91
+ def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY, proxies=None):
92
+ self.language = language
93
+ self.rate = rate
94
+ self.api_key = api_key
95
+ self.retries = retries
96
+ self.proxies = proxies
97
+
98
+ def __call__(self, data):
99
+ try:
100
+ for _ in range(self.retries):
101
+ url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
102
+ headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
103
+
104
+ try:
105
+ if self.proxies:
106
+ resp = requests.post(url, data=data, headers=headers, proxies=self.proxies)
107
+ else:
108
+ resp = requests.post(url, data=data, headers=headers)
109
+ except requests.exceptions.ConnectionError:
110
+ continue
111
+
112
+ for line in resp.content.decode('utf-8').split("\n"):
113
+ try:
114
+ line = json.loads(line)
115
+ line = line['result'][0]['alternative'][0]['transcript']
116
+ return line[:1].upper() + line[1:]
117
+ except IndexError:
118
+ # no result
119
+ continue
120
+ except JSONDecodeError:
121
+ continue
122
+
123
+ except KeyboardInterrupt:
124
+ return None
125
+
126
+
127
+ class Translator(object): # pylint: disable=too-few-public-methods
128
+ """
129
+ Class for translating a sentence from a one language to another.
130
+ """
131
+ def __init__(self, language, api_key, src, dst):
132
+ self.language = language
133
+ self.api_key = api_key
134
+ self.service = build('translate', 'v2',
135
+ developerKey=self.api_key)
136
+ self.src = src
137
+ self.dst = dst
138
+
139
+ def __call__(self, sentence):
140
+ try:
141
+ if not sentence:
142
+ return None
143
+
144
+ result = self.service.translations().list( # pylint: disable=no-member
145
+ source=self.src,
146
+ target=self.dst,
147
+ q=[sentence]
148
+ ).execute()
149
+
150
+ if 'translations' in result and result['translations'] and \
151
+ 'translatedText' in result['translations'][0]:
152
+ return result['translations'][0]['translatedText']
153
+
154
+ return None
155
+
156
+ except KeyboardInterrupt:
157
+ return None
158
+
159
+ def which(program):
160
+ """
161
+ Return the path for a given executable.
162
+ """
163
+ def is_exe(file_path):
164
+ """
165
+ Checks whether a file is executable.
166
+ """
167
+ return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
168
+ #necessary to run on Windows
169
+ if os.name == "nt":
170
+ program += ".exe"
171
+ fpath, _ = os.path.split(program)
172
+ if fpath:
173
+ if is_exe(program):
174
+ return program
175
+ else:
176
+ local_program_path = PurePath(__file__).parent.parent.joinpath(program)
177
+ str_local_program_path = str(local_program_path)
178
+
179
+ if is_exe(str_local_program_path):
180
+ return str_local_program_path
181
+ else:
182
+ for path in os.environ["PATH"].split(os.pathsep):
183
+ path = path.strip('"')
184
+ exe_file = os.path.join(path, program)
185
+ if is_exe(exe_file):
186
+ return exe_file
187
+ return None
188
+
189
+
190
+ def extract_audio(filename, channels=1, rate=16000):
191
+ """
192
+ Extract audio from an input file to a temporary WAV file.
193
+ """
194
+ temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
195
+ if not os.path.isfile(filename):
196
+ print("The given file does not exist: {}".format(filename))
197
+ raise Exception("Invalid filepath: {}".format(filename))
198
+ program_ffmpeg = which("ffmpeg")
199
+ if not program_ffmpeg:
200
+ print("ffmpeg: Executable not found on machine.")
201
+ raise Exception("Dependency not found: ffmpeg")
202
+ command = [str(program_ffmpeg), "-y", "-i", filename,
203
+ "-ac", str(channels), "-ar", str(rate),
204
+ "-loglevel", "error", temp.name]
205
+ use_shell = True if os.name == "nt" else False
206
+ subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
207
+ return temp.name, rate
208
+
209
+
210
+ def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
211
+ """
212
+ Perform voice activity detection on a given audio file.
213
+ """
214
+ reader = wave.open(filename)
215
+ sample_width = reader.getsampwidth()
216
+ rate = reader.getframerate()
217
+ n_channels = reader.getnchannels()
218
+ chunk_duration = float(frame_width) / rate
219
+
220
+ n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
221
+ energies = []
222
+
223
+ for _ in range(n_chunks):
224
+ chunk = reader.readframes(frame_width)
225
+ energies.append(audioop.rms(chunk, sample_width * n_channels))
226
+
227
+ threshold = percentile(energies, 0.2)
228
+
229
+ elapsed_time = 0
230
+
231
+ regions = []
232
+ region_start = None
233
+
234
+ for energy in energies:
235
+ is_silence = energy <= threshold
236
+ max_exceeded = region_start and elapsed_time - region_start >= max_region_size
237
+
238
+ if (max_exceeded or is_silence) and region_start:
239
+ if elapsed_time - region_start >= min_region_size:
240
+ regions.append((region_start, elapsed_time))
241
+ region_start = None
242
+
243
+ elif (not region_start) and (not is_silence):
244
+ region_start = elapsed_time
245
+ elapsed_time += chunk_duration
246
+ return regions
247
+
248
+
249
+ def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
250
+ source_path,
251
+ output=None,
252
+ concurrency=DEFAULT_CONCURRENCY,
253
+ src_language=DEFAULT_SRC_LANGUAGE,
254
+ dst_language=DEFAULT_DST_LANGUAGE,
255
+ subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
256
+ api_key=None,
257
+ proxies=None
258
+ ):
259
+ """
260
+ Given an input audio/video file, generate subtitles in the specified language and format.
261
+ """
262
+
263
+ if os.name != "nt" and "Darwin" in os.uname():
264
+ #the default unix fork method does not work on Mac OS
265
+ #need to use forkserver
266
+ if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
267
+ multiprocessing.set_start_method('forkserver')
268
+
269
+ audio_filename, audio_rate = extract_audio(source_path)
270
+
271
+ regions = find_speech_regions(audio_filename)
272
+
273
+ pool = multiprocessing.Pool(concurrency)
274
+ converter = FLACConverter(source_path=audio_filename)
275
+ recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
276
+ api_key=GOOGLE_SPEECH_API_KEY, proxies=proxies)
277
+
278
+ transcripts = []
279
+ if regions:
280
+ try:
281
+ widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
282
+ ETA()]
283
+ pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
284
+ extracted_regions = []
285
+ for i, extracted_region in enumerate(pool.imap(converter, regions)):
286
+ extracted_regions.append(extracted_region)
287
+ pbar.update(i)
288
+ pbar.finish()
289
+
290
+ widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
291
+ pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
292
+
293
+ for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
294
+ transcripts.append(transcript)
295
+ pbar.update(i)
296
+ pbar.finish()
297
+
298
+ if src_language.split("-")[0] != dst_language.split("-")[0]:
299
+ if api_key:
300
+ google_translate_api_key = api_key
301
+ translator = Translator(dst_language, google_translate_api_key,
302
+ dst=dst_language,
303
+ src=src_language)
304
+ prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
305
+ widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
306
+ pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
307
+ translated_transcripts = []
308
+ for i, transcript in enumerate(pool.imap(translator, transcripts)):
309
+ translated_transcripts.append(transcript)
310
+ pbar.update(i)
311
+ pbar.finish()
312
+ transcripts = translated_transcripts
313
+ else:
314
+ print(
315
+ "Error: Subtitle translation requires specified Google Translate API key. "
316
+ "See --help for further information."
317
+ )
318
+ return 1
319
+
320
+ except KeyboardInterrupt:
321
+ pbar.finish()
322
+ pool.terminate()
323
+ pool.join()
324
+ print("Cancelling transcription")
325
+ raise
326
+
327
+ timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
328
+ formatter = FORMATTERS.get(subtitle_file_format)
329
+ formatted_subtitles = formatter(timed_subtitles)
330
+
331
+ dest = output
332
+
333
+ if not dest:
334
+ base = os.path.splitext(source_path)[0]
335
+ dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
336
+
337
+ with open(dest, 'wb') as output_file:
338
+ output_file.write(formatted_subtitles.encode("utf-8"))
339
+
340
+ os.remove(audio_filename)
341
+
342
+ return dest
343
+
344
+
345
+ def validate(args):
346
+ """
347
+ Check that the CLI arguments passed to autosub are valid.
348
+ """
349
+ if args.format not in FORMATTERS:
350
+ print(
351
+ "Subtitle format not supported. "
352
+ "Run with --list-formats to see all supported formats."
353
+ )
354
+ return False
355
+
356
+ if args.src_language not in LANGUAGE_CODES.keys():
357
+ print(
358
+ "Source language not supported. "
359
+ "Run with --list-languages to see all supported languages."
360
+ )
361
+ return False
362
+
363
+ if args.dst_language not in LANGUAGE_CODES.keys():
364
+ print(
365
+ "Destination language not supported. "
366
+ "Run with --list-languages to see all supported languages."
367
+ )
368
+ return False
369
+
370
+ if not args.source_path:
371
+ print("Error: You need to specify a source path.")
372
+ return False
373
+
374
+ return True
375
+
376
+
377
+ def main():
378
+ """
379
+ Run autosub as a command-line program.
380
+ """
381
+ parser = argparse.ArgumentParser()
382
+ parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
383
+ nargs='?')
384
+ parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
385
+ type=int, default=DEFAULT_CONCURRENCY)
386
+ parser.add_argument('-o', '--output',
387
+ help="Output path for subtitles (by default, subtitles are saved in \
388
+ the same directory and name as the source path)")
389
+ parser.add_argument('-F', '--format', help="Destination subtitle format",
390
+ default=DEFAULT_SUBTITLE_FORMAT)
391
+ parser.add_argument('-S', '--src-language', help="Language spoken in source file",
392
+ default=DEFAULT_SRC_LANGUAGE)
393
+ parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
394
+ default=DEFAULT_DST_LANGUAGE)
395
+ parser.add_argument('-K', '--api-key',
396
+ help="The Google Translate API key to be used. \
397
+ (Required for subtitle translation)")
398
+ parser.add_argument('--list-formats', help="List all available subtitle formats",
399
+ action='store_true')
400
+ parser.add_argument('--list-languages', help="List all available source/destination languages",
401
+ action='store_true')
402
+
403
+ args = parser.parse_args()
404
+
405
+ if args.list_formats:
406
+ print("List of formats:")
407
+ for subtitle_format in FORMATTERS:
408
+ print("{format}".format(format=subtitle_format))
409
+ return 0
410
+
411
+ if args.list_languages:
412
+ print("List of all languages:")
413
+ for code, language in sorted(LANGUAGE_CODES.items()):
414
+ print("{code}\t{language}".format(code=code, language=language))
415
+ return 0
416
+
417
+ if not validate(args):
418
+ return 1
419
+
420
+ try:
421
+ subtitle_file_path = generate_subtitles(
422
+ source_path=args.source_path,
423
+ concurrency=args.concurrency,
424
+ src_language=args.src_language,
425
+ dst_language=args.dst_language,
426
+ api_key=args.api_key,
427
+ subtitle_file_format=args.format,
428
+ output=args.output,
429
+ )
430
+ print("Subtitles file created at {}".format(subtitle_file_path))
431
+ except KeyboardInterrupt:
432
+ return 1
433
+
434
+ return 0
435
+
436
+
437
+ if __name__ == '__main__':
438
+ sys.exit(main())
autosub/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (11.8 kB). View file
 
autosub/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (11.7 kB). View file
 
autosub/__pycache__/constants.cpython-310.pyc ADDED
Binary file (3.3 kB). View file
 
autosub/__pycache__/constants.cpython-37.pyc ADDED
Binary file (2.66 kB). View file
 
autosub/__pycache__/formatters.cpython-310.pyc ADDED
Binary file (2.17 kB). View file
 
autosub/__pycache__/formatters.cpython-37.pyc ADDED
Binary file (2.12 kB). View file
 
autosub/constants.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Defines constants used by autosub.
3
+ """
4
+
5
+ from __future__ import unicode_literals
6
+
7
+ GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
8
+ GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
9
+
10
+ LANGUAGE_CODES = {
11
+ 'af': 'Afrikaans',
12
+ 'ar': 'Arabic',
13
+ 'az': 'Azerbaijani',
14
+ 'be': 'Belarusian',
15
+ 'bg': 'Bulgarian',
16
+ 'bn': 'Bengali',
17
+ 'bs': 'Bosnian',
18
+ 'ca': 'Catalan',
19
+ 'ceb': 'Cebuano',
20
+ 'cs': 'Czech',
21
+ 'cy': 'Welsh',
22
+ 'da': 'Danish',
23
+ 'de': 'German',
24
+ 'el': 'Greek',
25
+ 'en-AU': 'English (Australia)',
26
+ 'en-CA': 'English (Canada)',
27
+ 'en-GB': 'English (United Kingdom)',
28
+ 'en-IN': 'English (India)',
29
+ 'en-IE': 'English (Ireland)',
30
+ 'en-NZ': 'English (New Zealand)',
31
+ 'en-PH': 'English (Philippines)',
32
+ 'en-SG': 'English (Singapore)',
33
+ 'en-US': 'English (United States)',
34
+ 'eo': 'Esperanto',
35
+ 'es-AR': 'Spanish (Argentina)',
36
+ 'es-CL': 'Spanish (Chile)',
37
+ 'es-ES': 'Spanish (Spain)',
38
+ 'es-US': 'Spanish (United States)',
39
+ 'es-MX': 'Spanish (Mexico)',
40
+ 'es': 'Spanish',
41
+ 'et': 'Estonian',
42
+ 'eu': 'Basque',
43
+ 'fa': 'Persian',
44
+ 'fi': 'Finnish',
45
+ 'fr': 'French',
46
+ 'ga': 'Irish',
47
+ 'gl': 'Galician',
48
+ 'gu': 'Gujarati',
49
+ 'ha': 'Hausa',
50
+ 'hi': 'Hindi',
51
+ 'hmn': 'Hmong',
52
+ 'hr': 'Croatian',
53
+ 'ht': 'Haitian Creole',
54
+ 'hu': 'Hungarian',
55
+ 'hy': 'Armenian',
56
+ 'id': 'Indonesian',
57
+ 'ig': 'Igbo',
58
+ 'is': 'Icelandic',
59
+ 'it': 'Italian',
60
+ 'iw': 'Hebrew',
61
+ 'ja': 'Japanese',
62
+ 'jw': 'Javanese',
63
+ 'ka': 'Georgian',
64
+ 'kk': 'Kazakh',
65
+ 'km': 'Khmer',
66
+ 'kn': 'Kannada',
67
+ 'ko': 'Korean',
68
+ 'la': 'Latin',
69
+ 'lo': 'Lao',
70
+ 'lt': 'Lithuanian',
71
+ 'lv': 'Latvian',
72
+ 'mg': 'Malagasy',
73
+ 'mi': 'Maori',
74
+ 'mk': 'Macedonian',
75
+ 'ml': 'Malayalam',
76
+ 'mn': 'Mongolian',
77
+ 'mr': 'Marathi',
78
+ 'ms': 'Malay',
79
+ 'mt': 'Maltese',
80
+ 'my': 'Myanmar (Burmese)',
81
+ 'ne': 'Nepali',
82
+ 'nl': 'Dutch',
83
+ 'no': 'Norwegian',
84
+ 'ny': 'Chichewa',
85
+ 'pa': 'Punjabi',
86
+ 'pl': 'Polish',
87
+ 'pt-BR': 'Portuguese (Brazil)',
88
+ 'pt-PT': 'Portuguese (Portugal)',
89
+ 'ro': 'Romanian',
90
+ 'ru': 'Russian',
91
+ 'si': 'Sinhala',
92
+ 'sk': 'Slovak',
93
+ 'sl': 'Slovenian',
94
+ 'so': 'Somali',
95
+ 'sq': 'Albanian',
96
+ 'sr': 'Serbian',
97
+ 'st': 'Sesotho',
98
+ 'su': 'Sudanese',
99
+ 'sv': 'Swedish',
100
+ 'sw': 'Swahili',
101
+ 'ta': 'Tamil',
102
+ 'te': 'Telugu',
103
+ 'tg': 'Tajik',
104
+ 'th': 'Thai',
105
+ 'tl': 'Filipino',
106
+ 'tr': 'Turkish',
107
+ 'uk': 'Ukrainian',
108
+ 'ur': 'Urdu',
109
+ 'uz': 'Uzbek',
110
+ 'vi': 'Vietnamese',
111
+ 'yi': 'Yiddish',
112
+ 'yo': 'Yoruba',
113
+ 'yue-Hant-HK': 'Cantonese, (Traditional HK)',
114
+ 'zh': 'Chinese (Simplified, China)',
115
+ 'zh-HK': 'Chinese (Simplified, Hong Kong)',
116
+ 'zh-TW': 'Chinese (Traditional, Taiwan)',
117
+ 'zu': 'Zulu',
118
+ }
autosub/formatters.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Defines subtitle formatters used by autosub.
3
+ """
4
+
5
+ # -*- coding: utf-8 -*-
6
+ from __future__ import unicode_literals
7
+
8
+ import json
9
+
10
+ import pysrt
11
+ import six
12
+
13
+
14
+ def srt_formatter(subtitles, padding_before=0, padding_after=0):
15
+ """
16
+ Serialize a list of subtitles according to the SRT format, with optional time padding.
17
+ """
18
+ sub_rip_file = pysrt.SubRipFile()
19
+ for i, ((start, end), text) in enumerate(subtitles, start=1):
20
+ item = pysrt.SubRipItem()
21
+ item.index = i
22
+ item.text = six.text_type(text)
23
+ item.start.seconds = max(0, start - padding_before)
24
+ item.end.seconds = end + padding_after
25
+ sub_rip_file.append(item)
26
+ return '\n'.join(six.text_type(item) for item in sub_rip_file)
27
+
28
+
29
+ def vtt_formatter(subtitles, padding_before=0, padding_after=0):
30
+ """
31
+ Serialize a list of subtitles according to the VTT format, with optional time padding.
32
+ """
33
+ text = srt_formatter(subtitles, padding_before, padding_after)
34
+ text = 'WEBVTT\n\n' + text.replace(',', '.')
35
+ return text
36
+
37
+
38
+ def json_formatter(subtitles):
39
+ """
40
+ Serialize a list of subtitles as a JSON blob.
41
+ """
42
+ subtitle_dicts = [
43
+ {
44
+ 'start': start,
45
+ 'end': end,
46
+ 'content': text,
47
+ }
48
+ for ((start, end), text)
49
+ in subtitles
50
+ ]
51
+ return json.dumps(subtitle_dicts)
52
+
53
+
54
+ def raw_formatter(subtitles):
55
+ """
56
+ Serialize a list of subtitles as a newline-delimited string.
57
+ """
58
+ return ' '.join(text for (_rng, text) in subtitles)
59
+
60
+
61
+ FORMATTERS = {
62
+ 'srt': srt_formatter,
63
+ 'vtt': vtt_formatter,
64
+ 'json': json_formatter,
65
+ 'raw': raw_formatter,
66
+ }