|
import argparse |
|
|
|
from ._api import YouTubeTranscriptApi |
|
|
|
from .formatters import FormatterLoader |
|
|
|
|
|
class YouTubeTranscriptCli(object): |
|
def __init__(self, args): |
|
self._args = args |
|
|
|
def run(self): |
|
parsed_args = self._parse_args() |
|
|
|
if parsed_args.exclude_manually_created and parsed_args.exclude_generated: |
|
return '' |
|
|
|
proxies = None |
|
if parsed_args.http_proxy != '' or parsed_args.https_proxy != '': |
|
proxies = {"http": parsed_args.http_proxy, "https": parsed_args.https_proxy} |
|
|
|
cookies = parsed_args.cookies |
|
|
|
transcripts = [] |
|
exceptions = [] |
|
|
|
for video_id in parsed_args.video_ids: |
|
try: |
|
transcripts.append(self._fetch_transcript(parsed_args, proxies, cookies, video_id)) |
|
except Exception as exception: |
|
exceptions.append(exception) |
|
|
|
return '\n\n'.join( |
|
[str(exception) for exception in exceptions] |
|
+ ([FormatterLoader().load(parsed_args.format).format_transcripts(transcripts)] if transcripts else []) |
|
) |
|
|
|
def _fetch_transcript(self, parsed_args, proxies, cookies, video_id): |
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id, proxies=proxies, cookies=cookies) |
|
|
|
if parsed_args.list_transcripts: |
|
return str(transcript_list) |
|
|
|
if parsed_args.exclude_manually_created: |
|
transcript = transcript_list.find_generated_transcript(parsed_args.languages) |
|
elif parsed_args.exclude_generated: |
|
transcript = transcript_list.find_manually_created_transcript(parsed_args.languages) |
|
else: |
|
transcript = transcript_list.find_transcript(parsed_args.languages) |
|
|
|
if parsed_args.translate: |
|
transcript = transcript.translate(parsed_args.translate) |
|
|
|
return transcript.fetch() |
|
|
|
def _parse_args(self): |
|
parser = argparse.ArgumentParser( |
|
description=( |
|
'This is an python API which allows you to get the transcripts/subtitles for a given YouTube video. ' |
|
'It also works for automatically generated subtitles and it does not require a headless browser, like ' |
|
'other selenium based solutions do!' |
|
) |
|
) |
|
parser.add_argument( |
|
'--list-transcripts', |
|
action='store_const', |
|
const=True, |
|
default=False, |
|
help='This will list the languages in which the given videos are available in.', |
|
) |
|
parser.add_argument('video_ids', nargs='+', type=str, help='List of YouTube video IDs.') |
|
parser.add_argument( |
|
'--languages', |
|
nargs='*', |
|
default=['en',], |
|
type=str, |
|
help=( |
|
'A list of language codes in a descending priority. For example, if this is set to "de en" it will ' |
|
'first try to fetch the german transcript (de) and then fetch the english transcript (en) if it fails ' |
|
'to do so. As I can\'t provide a complete list of all working language codes with full certainty, you ' |
|
'may have to play around with the language codes a bit, to find the one which is working for you!' |
|
), |
|
) |
|
parser.add_argument( |
|
'--exclude-generated', |
|
action='store_const', |
|
const=True, |
|
default=False, |
|
help='If this flag is set transcripts which have been generated by YouTube will not be retrieved.', |
|
) |
|
parser.add_argument( |
|
'--exclude-manually-created', |
|
action='store_const', |
|
const=True, |
|
default=False, |
|
help='If this flag is set transcripts which have been manually created will not be retrieved.', |
|
) |
|
parser.add_argument( |
|
'--format', |
|
type=str, |
|
default='pretty', |
|
choices=tuple(FormatterLoader.TYPES.keys()), |
|
) |
|
parser.add_argument( |
|
'--translate', |
|
default='', |
|
help=( |
|
'The language code for the language you want this transcript to be translated to. Use the ' |
|
'--list-transcripts feature to find out which languages are translatable and which translation ' |
|
'languages are available.' |
|
) |
|
) |
|
parser.add_argument( |
|
'--http-proxy', |
|
default='', |
|
metavar='URL', |
|
help='Use the specified HTTP proxy.' |
|
) |
|
parser.add_argument( |
|
'--https-proxy', |
|
default='', |
|
metavar='URL', |
|
help='Use the specified HTTPS proxy.' |
|
) |
|
parser.add_argument( |
|
'--cookies', |
|
default=None, |
|
help='The cookie file that will be used for authorization with youtube.' |
|
) |
|
|
|
return self._sanitize_video_ids(parser.parse_args(self._args)) |
|
|
|
def _sanitize_video_ids(self, args): |
|
args.video_ids = [video_id.replace('\\', '') for video_id in args.video_ids] |
|
return args |
|
|