pydocstyle
Browse files- pytube/__init__.py +4 -5
- pytube/__main__.py +20 -15
- pytube/cipher.py +34 -18
- pytube/cli.py +6 -12
- pytube/compat.py +1 -6
- pytube/exceptions.py +8 -12
- pytube/extract.py +18 -17
- pytube/helpers.py +7 -14
- pytube/itags.py +2 -8
- pytube/logging.py +6 -6
- pytube/mixins.py +3 -9
- pytube/query.py +15 -17
- pytube/request.py +3 -8
- pytube/streams.py +14 -14
- setup.cfg +1 -10
pytube/__init__.py
CHANGED
@@ -2,12 +2,11 @@
|
|
2 |
# flake8: noqa
|
3 |
# noreorder
|
4 |
"""
|
5 |
-
|
6 |
-
|
|
|
|
|
7 |
|
8 |
-
A lightweight, dependency-free Python library (and command-line utility) for
|
9 |
-
downloading YouTube Videos. It's extensively documented and follows best
|
10 |
-
practice patterns.
|
11 |
"""
|
12 |
__title__ = 'pytube'
|
13 |
__version__ = '6.4.3'
|
|
|
2 |
# flake8: noqa
|
3 |
# noreorder
|
4 |
"""
|
5 |
+
Pytube is a Python library for downloading YouTube Videos.
|
6 |
+
|
7 |
+
Pytube aims to be lightweight, dependency-free, extensively documented and
|
8 |
+
follows best practice patterns.
|
9 |
|
|
|
|
|
|
|
10 |
"""
|
11 |
__title__ = 'pytube'
|
12 |
__version__ = '6.4.3'
|
pytube/__main__.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
-
pytube.
|
4 |
-
~~~~~~~~~~~~~~~
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
|
10 |
"""
|
11 |
from __future__ import absolute_import
|
@@ -27,12 +26,13 @@ logger = logging.getLogger(__name__)
|
|
27 |
|
28 |
|
29 |
class YouTube(object):
|
|
|
30 |
|
31 |
def __init__(
|
32 |
self, url=None, defer_init=False, on_progress_callback=None,
|
33 |
on_complete_callback=None,
|
34 |
):
|
35 |
-
"""
|
36 |
|
37 |
:param str url:
|
38 |
A valid YouTube watch URL.
|
@@ -69,7 +69,7 @@ class YouTube(object):
|
|
69 |
# A dictionary shared between all instances of :class:`Stream <Stream>`
|
70 |
# (Borg pattern).
|
71 |
self.stream_monostate = {
|
72 |
-
# user defined callback functions
|
73 |
'on_progress': on_progress_callback,
|
74 |
'on_complete': on_complete_callback,
|
75 |
}
|
@@ -78,9 +78,12 @@ class YouTube(object):
|
|
78 |
self.init()
|
79 |
|
80 |
def init(self):
|
81 |
-
"""
|
82 |
-
the instances of :class:`Stream <Stream>`.
|
83 |
|
|
|
|
|
|
|
|
|
84 |
"""
|
85 |
logger.info('init started')
|
86 |
self.prefetch()
|
@@ -111,7 +114,9 @@ class YouTube(object):
|
|
111 |
logger.info('init finished successfully')
|
112 |
|
113 |
def prefetch(self):
|
114 |
-
"""Eagerly
|
|
|
|
|
115 |
operations don't does need to make calls outside of the interpreter
|
116 |
which blocks for long periods of time.
|
117 |
|
@@ -129,7 +134,9 @@ class YouTube(object):
|
|
129 |
])
|
130 |
|
131 |
def initialize_stream_objects(self, fmt):
|
132 |
-
"""
|
|
|
|
|
133 |
instances of :class:`Stream <Stream>` for each media stream.
|
134 |
|
135 |
:param str fmt:
|
@@ -166,8 +173,7 @@ class YouTube(object):
|
|
166 |
return StreamQuery([s for s in self.fmt_streams])
|
167 |
|
168 |
def register_on_progress_callback(self, func):
|
169 |
-
"""
|
170 |
-
initialization.
|
171 |
|
172 |
:param callable func:
|
173 |
A callback function that takes ``stream``, ``chunk``,
|
@@ -176,8 +182,7 @@ class YouTube(object):
|
|
176 |
self._monostate['on_progress'] = func
|
177 |
|
178 |
def register_on_complete_callback(self, func):
|
179 |
-
"""
|
180 |
-
initialization.
|
181 |
|
182 |
:param callable func:
|
183 |
A callback function that takes ``stream`` and ``file_handle``.
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
+
This module implements the core developer interface for pytube.
|
|
|
4 |
|
5 |
+
The problem domain of the :class:`YouTube <YouTube> class focuses almost
|
6 |
+
exclusively on the developer interface. Pytube offloads the heavy lifting to
|
7 |
+
smaller peripheral modules and functions.
|
8 |
|
9 |
"""
|
10 |
from __future__ import absolute_import
|
|
|
26 |
|
27 |
|
28 |
class YouTube(object):
|
29 |
+
"""Core developer interface for pytube."""
|
30 |
|
31 |
def __init__(
|
32 |
self, url=None, defer_init=False, on_progress_callback=None,
|
33 |
on_complete_callback=None,
|
34 |
):
|
35 |
+
"""Construct a :class:`YouTube <YouTube>`.
|
36 |
|
37 |
:param str url:
|
38 |
A valid YouTube watch URL.
|
|
|
69 |
# A dictionary shared between all instances of :class:`Stream <Stream>`
|
70 |
# (Borg pattern).
|
71 |
self.stream_monostate = {
|
72 |
+
# user defined callback functions.
|
73 |
'on_progress': on_progress_callback,
|
74 |
'on_complete': on_complete_callback,
|
75 |
}
|
|
|
78 |
self.init()
|
79 |
|
80 |
def init(self):
|
81 |
+
"""Download data, descramble it, and build Stream instances.
|
|
|
82 |
|
83 |
+
The initialization process takes advantage of Python's
|
84 |
+
"call-by-reference evaluation," which allows dictionary transforms to
|
85 |
+
be applied in-place, instead of holding references to mutations at each
|
86 |
+
interstitial step.
|
87 |
"""
|
88 |
logger.info('init started')
|
89 |
self.prefetch()
|
|
|
114 |
logger.info('init finished successfully')
|
115 |
|
116 |
def prefetch(self):
|
117 |
+
"""Eagerly download all necessary data.
|
118 |
+
|
119 |
+
Eagerly executes all necessary network requests so all other
|
120 |
operations don't does need to make calls outside of the interpreter
|
121 |
which blocks for long periods of time.
|
122 |
|
|
|
134 |
])
|
135 |
|
136 |
def initialize_stream_objects(self, fmt):
|
137 |
+
"""Convert manifest data to instances of :class:`Stream <Stream>`.
|
138 |
+
|
139 |
+
Take the unscrambled stream data and uses it to initialize
|
140 |
instances of :class:`Stream <Stream>` for each media stream.
|
141 |
|
142 |
:param str fmt:
|
|
|
173 |
return StreamQuery([s for s in self.fmt_streams])
|
174 |
|
175 |
def register_on_progress_callback(self, func):
|
176 |
+
"""Register a download progess callback function post initialization.
|
|
|
177 |
|
178 |
:param callable func:
|
179 |
A callback function that takes ``stream``, ``chunk``,
|
|
|
182 |
self._monostate['on_progress'] = func
|
183 |
|
184 |
def register_on_complete_callback(self, func):
|
185 |
+
"""Register a download complete callback function post initialization.
|
|
|
186 |
|
187 |
:param callable func:
|
188 |
A callback function that takes ``stream`` and ``file_handle``.
|
pytube/cipher.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
-
|
4 |
-
~~~~~~~~~~~~~
|
5 |
|
6 |
YouTube's strategy to restrict downloading videos is to send a ciphered version
|
7 |
of the signature to the client, along with the decryption algorithm obfuscated
|
@@ -14,6 +13,7 @@ signature sent in the GET parameters is valid, and then returns the content
|
|
14 |
This module is responsible for (1) finding and extracting those "transform
|
15 |
functions" (2) maps them to Python equivalents and (3) taking the ciphered
|
16 |
signature and decoding it.
|
|
|
17 |
"""
|
18 |
from __future__ import absolute_import
|
19 |
|
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
|
|
31 |
|
32 |
|
33 |
def get_initial_function_name(js):
|
34 |
-
"""
|
35 |
|
36 |
:param str js:
|
37 |
The contents of the base.js asset file.
|
@@ -44,8 +44,10 @@ def get_initial_function_name(js):
|
|
44 |
|
45 |
|
46 |
def get_transform_plan(js):
|
47 |
-
"""
|
48 |
-
|
|
|
|
|
49 |
|
50 |
:param str js:
|
51 |
The contents of the base.js asset file.
|
@@ -69,11 +71,12 @@ def get_transform_plan(js):
|
|
69 |
|
70 |
|
71 |
def get_transform_object(js, var):
|
72 |
-
"""
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
the
|
|
|
77 |
|
78 |
:param str js:
|
79 |
The contents of the base.js asset file.
|
@@ -99,7 +102,9 @@ def get_transform_object(js, var):
|
|
99 |
|
100 |
@memoize
|
101 |
def get_transform_map(js, var):
|
102 |
-
"""
|
|
|
|
|
103 |
Python equivalents.
|
104 |
|
105 |
:param str js:
|
@@ -120,9 +125,12 @@ def get_transform_map(js, var):
|
|
120 |
|
121 |
|
122 |
def reverse(arr, b):
|
123 |
-
"""
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
126 |
|
127 |
Example usage:
|
128 |
~~~~~~~~~~~~~~
|
@@ -133,7 +141,9 @@ def reverse(arr, b):
|
|
133 |
|
134 |
|
135 |
def splice(arr, b):
|
136 |
-
"""
|
|
|
|
|
137 |
|
138 |
Example usage:
|
139 |
~~~~~~~~~~~~~~
|
@@ -144,7 +154,9 @@ def splice(arr, b):
|
|
144 |
|
145 |
|
146 |
def swap(arr, b):
|
147 |
-
"""
|
|
|
|
|
148 |
function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c }.
|
149 |
|
150 |
Example usage:
|
@@ -182,7 +194,9 @@ def map_functions(js_func):
|
|
182 |
|
183 |
|
184 |
def parse_function(js_func):
|
185 |
-
"""
|
|
|
|
|
186 |
containing the function name and some integer-based argument.
|
187 |
|
188 |
Sample Input:
|
@@ -204,7 +218,9 @@ def parse_function(js_func):
|
|
204 |
|
205 |
@memoize
|
206 |
def get_signature(js, ciphered_signature):
|
207 |
-
"""
|
|
|
|
|
208 |
returns the decrypted version.
|
209 |
|
210 |
:param str js:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
+
This module countains all logic necessary to decipher the signature.
|
|
|
4 |
|
5 |
YouTube's strategy to restrict downloading videos is to send a ciphered version
|
6 |
of the signature to the client, along with the decryption algorithm obfuscated
|
|
|
13 |
This module is responsible for (1) finding and extracting those "transform
|
14 |
functions" (2) maps them to Python equivalents and (3) taking the ciphered
|
15 |
signature and decoding it.
|
16 |
+
|
17 |
"""
|
18 |
from __future__ import absolute_import
|
19 |
|
|
|
31 |
|
32 |
|
33 |
def get_initial_function_name(js):
|
34 |
+
"""Extract the name of the function responsible for computing the signature.
|
35 |
|
36 |
:param str js:
|
37 |
The contents of the base.js asset file.
|
|
|
44 |
|
45 |
|
46 |
def get_transform_plan(js):
|
47 |
+
"""Extract the "transform plan".
|
48 |
+
|
49 |
+
The "transform plan" is the functions the ciphered signature is passed
|
50 |
+
through to obtain the actual signature.
|
51 |
|
52 |
:param str js:
|
53 |
The contents of the base.js asset file.
|
|
|
71 |
|
72 |
|
73 |
def get_transform_object(js, var):
|
74 |
+
"""Extract the "transform object".
|
75 |
+
|
76 |
+
The "transform object" contains the function definitions referenced in the
|
77 |
+
"transform plan". The ``var`` argument is the obfuscated variable name
|
78 |
+
which contains these functions, for example, given the function call
|
79 |
+
``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var.
|
80 |
|
81 |
:param str js:
|
82 |
The contents of the base.js asset file.
|
|
|
102 |
|
103 |
@memoize
|
104 |
def get_transform_map(js, var):
|
105 |
+
"""Build a transform function lookup.
|
106 |
+
|
107 |
+
Build a lookup table of obfuscated JavaScript function names to the
|
108 |
Python equivalents.
|
109 |
|
110 |
:param str js:
|
|
|
125 |
|
126 |
|
127 |
def reverse(arr, b):
|
128 |
+
"""Reverse elements in a list.
|
129 |
+
|
130 |
+
This function is equivalent to: function(a, b) { a.reverse() }.
|
131 |
+
|
132 |
+
This method takes an unused ``b`` variable as their transform functions
|
133 |
+
universally sent two arguments.
|
134 |
|
135 |
Example usage:
|
136 |
~~~~~~~~~~~~~~
|
|
|
141 |
|
142 |
|
143 |
def splice(arr, b):
|
144 |
+
"""Add/remove items to/from a list.
|
145 |
+
|
146 |
+
This function is equivalent to: function(a, b) { a.splice(0, b) }.
|
147 |
|
148 |
Example usage:
|
149 |
~~~~~~~~~~~~~~
|
|
|
154 |
|
155 |
|
156 |
def swap(arr, b):
|
157 |
+
"""Swap positions at b modulus the list length.
|
158 |
+
|
159 |
+
This function is equivalent to:
|
160 |
function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c }.
|
161 |
|
162 |
Example usage:
|
|
|
194 |
|
195 |
|
196 |
def parse_function(js_func):
|
197 |
+
"""Parse the Javascript transform function.
|
198 |
+
|
199 |
+
Break a JavaScript transform function down into a two element tuple
|
200 |
containing the function name and some integer-based argument.
|
201 |
|
202 |
Sample Input:
|
|
|
218 |
|
219 |
@memoize
|
220 |
def get_signature(js, ciphered_signature):
|
221 |
+
"""Decipher the signature.
|
222 |
+
|
223 |
+
Taking the ciphered signature, applies the transform functions and
|
224 |
returns the decrypted version.
|
225 |
|
226 |
:param str js:
|
pytube/cli.py
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.cli
|
4 |
-
~~~~~~~~~~
|
5 |
-
|
6 |
-
A simple command line application to download youtube videos.
|
7 |
-
|
8 |
-
"""
|
9 |
from __future__ import print_function
|
10 |
|
11 |
import argparse
|
@@ -16,7 +10,7 @@ from pytube import YouTube
|
|
16 |
|
17 |
|
18 |
def main():
|
19 |
-
"""
|
20 |
parser = argparse.ArgumentParser(description=main.__doc__)
|
21 |
parser.add_argument('url', help='The YouTube /watch url', nargs='?')
|
22 |
parser.add_argument(
|
@@ -41,13 +35,13 @@ def main():
|
|
41 |
|
42 |
|
43 |
def get_terminal_size():
|
44 |
-
"""
|
45 |
rows, columns = os.popen('stty size', 'r').read().split()
|
46 |
return int(rows), int(columns)
|
47 |
|
48 |
|
49 |
def display_progress_bar(bytes_received, filesize, ch='█', scale=0.55):
|
50 |
-
"""
|
51 |
|
52 |
Example:
|
53 |
~~~~~~~~
|
@@ -94,7 +88,7 @@ def on_progress(stream, file_handle, bytes_remaining):
|
|
94 |
|
95 |
|
96 |
def download(url, itag):
|
97 |
-
"""
|
98 |
|
99 |
:param str url:
|
100 |
A valid YouTube watch URL.
|
@@ -115,7 +109,7 @@ def download(url, itag):
|
|
115 |
|
116 |
|
117 |
def display_streams(url):
|
118 |
-
"""
|
119 |
|
120 |
:param str url:
|
121 |
A valid YouTube watch URL.
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""A simple command line application to download youtube videos."""
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from __future__ import print_function
|
4 |
|
5 |
import argparse
|
|
|
10 |
|
11 |
|
12 |
def main():
|
13 |
+
"""Command line application to download youtube videos."""
|
14 |
parser = argparse.ArgumentParser(description=main.__doc__)
|
15 |
parser.add_argument('url', help='The YouTube /watch url', nargs='?')
|
16 |
parser.add_argument(
|
|
|
35 |
|
36 |
|
37 |
def get_terminal_size():
|
38 |
+
"""Return the terminal size in rows and columns."""
|
39 |
rows, columns = os.popen('stty size', 'r').read().split()
|
40 |
return int(rows), int(columns)
|
41 |
|
42 |
|
43 |
def display_progress_bar(bytes_received, filesize, ch='█', scale=0.55):
|
44 |
+
"""Display a simple, pretty progress bar.
|
45 |
|
46 |
Example:
|
47 |
~~~~~~~~
|
|
|
88 |
|
89 |
|
90 |
def download(url, itag):
|
91 |
+
"""Start downloading a YouTube video.
|
92 |
|
93 |
:param str url:
|
94 |
A valid YouTube watch URL.
|
|
|
109 |
|
110 |
|
111 |
def display_streams(url):
|
112 |
+
"""Probe YouTube video and lists its available formats.
|
113 |
|
114 |
:param str url:
|
115 |
A valid YouTube watch URL.
|
pytube/compat.py
CHANGED
@@ -1,12 +1,7 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
# -*- coding: utf-8 -*-
|
3 |
# flake8: noqa
|
4 |
-
"""
|
5 |
-
pytube.compat
|
6 |
-
~~~~~~~~~~~~~
|
7 |
-
|
8 |
-
Some py2/py3 compatibility support.
|
9 |
-
"""
|
10 |
import sys
|
11 |
|
12 |
python_version = sys.version_info[0]
|
|
|
1 |
#!/usr/bin/env python
|
2 |
# -*- coding: utf-8 -*-
|
3 |
# flake8: noqa
|
4 |
+
"""Python 2/3 compatibility support."""
|
|
|
|
|
|
|
|
|
|
|
5 |
import sys
|
6 |
|
7 |
python_version = sys.version_info[0]
|
pytube/exceptions.py
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.exceptions
|
4 |
-
~~~~~~~~~~~~~~~~~
|
5 |
-
|
6 |
-
pytube specific exception definitions.
|
7 |
-
"""
|
8 |
import socket
|
9 |
import sys
|
10 |
from traceback import format_tb
|
@@ -13,9 +8,11 @@ from pytube.compat import URLError
|
|
13 |
|
14 |
|
15 |
class PytubeError(Exception):
|
16 |
-
"""Base pytube exception that all others inherent.
|
17 |
-
|
18 |
-
|
|
|
|
|
19 |
"""
|
20 |
|
21 |
|
@@ -23,8 +20,7 @@ class ExtractError(PytubeError):
|
|
23 |
"""Data extraction based exception."""
|
24 |
|
25 |
def __init__(self, msg, tb=None, expected=False, video_id=None):
|
26 |
-
"""
|
27 |
-
exception.
|
28 |
|
29 |
:param str msg:
|
30 |
User defined error message.
|
@@ -56,7 +52,7 @@ class ExtractError(PytubeError):
|
|
56 |
self.video_id = video_id
|
57 |
|
58 |
def format_traceback(self):
|
59 |
-
"""Pretty-print traceback"""
|
60 |
if self.traceback:
|
61 |
return ''.join(format_tb(self.traceback))
|
62 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""Library specific exception definitions."""
|
|
|
|
|
|
|
|
|
|
|
3 |
import socket
|
4 |
import sys
|
5 |
from traceback import format_tb
|
|
|
8 |
|
9 |
|
10 |
class PytubeError(Exception):
|
11 |
+
"""Base pytube exception that all others inherent.
|
12 |
+
|
13 |
+
This is done to not pollute the built-in exceptions, which *could* result
|
14 |
+
in unintended errors being unexpectedly and incorrectly handled within
|
15 |
+
implementers code.
|
16 |
"""
|
17 |
|
18 |
|
|
|
20 |
"""Data extraction based exception."""
|
21 |
|
22 |
def __init__(self, msg, tb=None, expected=False, video_id=None):
|
23 |
+
"""Construct an instance of a :class:`ExtractError <ExtractError>`.
|
|
|
24 |
|
25 |
:param str msg:
|
26 |
User defined error message.
|
|
|
52 |
self.video_id = video_id
|
53 |
|
54 |
def format_traceback(self):
|
55 |
+
"""Pretty-print the traceback."""
|
56 |
if self.traceback:
|
57 |
return ''.join(format_tb(self.traceback))
|
58 |
|
pytube/extract.py
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.extract
|
4 |
-
~~~~~~~~~~~~~~
|
5 |
-
|
6 |
-
This module is responsible for all non-cipher related data extraction
|
7 |
-
(primarily used during data pre-fetching).
|
8 |
-
"""
|
9 |
import json
|
10 |
|
11 |
from pytube.compat import quote
|
@@ -15,7 +9,7 @@ from pytube.helpers import regex_search
|
|
15 |
|
16 |
|
17 |
def video_id(url):
|
18 |
-
"""
|
19 |
|
20 |
:param str url:
|
21 |
A url YouTube id containing a video_id.
|
@@ -25,7 +19,7 @@ def video_id(url):
|
|
25 |
|
26 |
|
27 |
def watch_url(video_id):
|
28 |
-
"""
|
29 |
|
30 |
:param str video_id:
|
31 |
A YouTube video identifer.
|
@@ -35,7 +29,7 @@ def watch_url(video_id):
|
|
35 |
|
36 |
|
37 |
def video_info_url(video_id, watch_url, watch_html):
|
38 |
-
"""
|
39 |
|
40 |
:param str video_id:
|
41 |
A YouTube video identifer.
|
@@ -61,8 +55,10 @@ def video_info_url(video_id, watch_url, watch_html):
|
|
61 |
|
62 |
|
63 |
def js_url(watch_html):
|
64 |
-
"""
|
65 |
-
|
|
|
|
|
66 |
|
67 |
:param str watch_html:
|
68 |
The html contents of the watch page.
|
@@ -74,9 +70,11 @@ def js_url(watch_html):
|
|
74 |
|
75 |
|
76 |
def mime_type_codec(mime_type_codec):
|
77 |
-
"""
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
|
81 |
:param str mime_type_codec:
|
82 |
String containing mime type and codecs.
|
@@ -89,8 +87,11 @@ def mime_type_codec(mime_type_codec):
|
|
89 |
|
90 |
@memoize
|
91 |
def get_ytplayer_config(watch_html):
|
92 |
-
"""
|
93 |
-
|
|
|
|
|
|
|
94 |
|
95 |
:param str watch_html:
|
96 |
The html contents of the watch page.
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""This module contains all non-cipher related data extraction logic."""
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import json
|
4 |
|
5 |
from pytube.compat import quote
|
|
|
9 |
|
10 |
|
11 |
def video_id(url):
|
12 |
+
"""Extract the ``video_id`` part of /watch?v=<video_id>.
|
13 |
|
14 |
:param str url:
|
15 |
A url YouTube id containing a video_id.
|
|
|
19 |
|
20 |
|
21 |
def watch_url(video_id):
|
22 |
+
"""Construct a YouTube watch url, given a video id.
|
23 |
|
24 |
:param str video_id:
|
25 |
A YouTube video identifer.
|
|
|
29 |
|
30 |
|
31 |
def video_info_url(video_id, watch_url, watch_html):
|
32 |
+
"""Contruct the video_info url.
|
33 |
|
34 |
:param str video_id:
|
35 |
A YouTube video identifer.
|
|
|
55 |
|
56 |
|
57 |
def js_url(watch_html):
|
58 |
+
"""Get the base JavaScript url.
|
59 |
+
|
60 |
+
Construct the base JavaScript url, which contains the decipher
|
61 |
+
"transforms".
|
62 |
|
63 |
:param str watch_html:
|
64 |
The html contents of the watch page.
|
|
|
70 |
|
71 |
|
72 |
def mime_type_codec(mime_type_codec):
|
73 |
+
"""Parse the type data.
|
74 |
+
|
75 |
+
Breaks up the data in the ``type`` key of the manifest, which contains mime
|
76 |
+
type and codecs serialized together (e.g.: 'audio/webm; codecs="opus"'),
|
77 |
+
and splits them into separate elements. (e.g.: 'audio/webm', ['opus'])
|
78 |
|
79 |
:param str mime_type_codec:
|
80 |
String containing mime type and codecs.
|
|
|
87 |
|
88 |
@memoize
|
89 |
def get_ytplayer_config(watch_html):
|
90 |
+
"""Get the YouTube player configuration data from the watch html.
|
91 |
+
|
92 |
+
Extract the ``ytplayer_config``, which is json data embedded within the
|
93 |
+
watch html and serves as the primary source of obtaining the stream
|
94 |
+
manifest data.
|
95 |
|
96 |
:param str watch_html:
|
97 |
The html contents of the watch page.
|
pytube/helpers.py
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.helpers
|
4 |
-
~~~~~~~~~~~~~~
|
5 |
-
|
6 |
-
Various helper functions implemented by pytube.
|
7 |
-
|
8 |
-
"""
|
9 |
from __future__ import absolute_import
|
10 |
|
11 |
import functools
|
@@ -57,7 +51,7 @@ def regex_search(pattern, string, groups=False, group=None, flags=0):
|
|
57 |
|
58 |
|
59 |
def apply_mixin(dct, key, func, *args, **kwargs):
|
60 |
-
"""
|
61 |
|
62 |
:param dict dct:
|
63 |
Dictionary to apply mixin function to.
|
@@ -74,15 +68,16 @@ def apply_mixin(dct, key, func, *args, **kwargs):
|
|
74 |
|
75 |
|
76 |
def safe_filename(s, max_length=255):
|
77 |
-
"""
|
78 |
-
|
|
|
|
|
79 |
|
80 |
:param str text:
|
81 |
A string to make safe for use as a file name.
|
82 |
:param int max_length:
|
83 |
The maximum filename character length.
|
84 |
"""
|
85 |
-
|
86 |
# Characters in range 0-31 (0x00-0x1F) are not allowed in NTFS filenames.
|
87 |
ntfs_chrs = [chr(i) for i in range(0, 31)]
|
88 |
chrs = [
|
@@ -96,9 +91,7 @@ def safe_filename(s, max_length=255):
|
|
96 |
|
97 |
|
98 |
def memoize(func):
|
99 |
-
"""
|
100 |
-
avoid recomputation on repeat calls.
|
101 |
-
"""
|
102 |
cache = func.cache = {}
|
103 |
|
104 |
@functools.wraps(func)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""Various helper functions implemented by pytube."""
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from __future__ import absolute_import
|
4 |
|
5 |
import functools
|
|
|
51 |
|
52 |
|
53 |
def apply_mixin(dct, key, func, *args, **kwargs):
|
54 |
+
r"""Apply in-place data mutation to a dictionary.
|
55 |
|
56 |
:param dict dct:
|
57 |
Dictionary to apply mixin function to.
|
|
|
68 |
|
69 |
|
70 |
def safe_filename(s, max_length=255):
|
71 |
+
"""Sanitize a string making it safe to use as a filename.
|
72 |
+
|
73 |
+
This function was based off the limitations outlined here:
|
74 |
+
https://en.wikipedia.org/wiki/Filename.
|
75 |
|
76 |
:param str text:
|
77 |
A string to make safe for use as a file name.
|
78 |
:param int max_length:
|
79 |
The maximum filename character length.
|
80 |
"""
|
|
|
81 |
# Characters in range 0-31 (0x00-0x1F) are not allowed in NTFS filenames.
|
82 |
ntfs_chrs = [chr(i) for i in range(0, 31)]
|
83 |
chrs = [
|
|
|
91 |
|
92 |
|
93 |
def memoize(func):
|
94 |
+
"""Decorate that caches input arguments for return values."""
|
|
|
|
|
95 |
cache = func.cache = {}
|
96 |
|
97 |
@functools.wraps(func)
|
pytube/itags.py
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.itags
|
4 |
-
~~~~~~~~~~~~
|
5 |
-
|
6 |
-
This module contains a lookup table of YouTube's format identifier codes
|
7 |
-
(itags) to some additional meta data not specified in the media manifest.
|
8 |
-
"""
|
9 |
|
10 |
ITAGS = {
|
11 |
5: ('240p', '64kbps'),
|
@@ -97,7 +91,7 @@ LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
|
|
97 |
|
98 |
|
99 |
def get_format_profile(itag):
|
100 |
-
"""
|
101 |
|
102 |
:param str itag:
|
103 |
YouTube format identifier code.
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""This module contains a lookup table of YouTube's itag values."""
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
ITAGS = {
|
5 |
5: ('240p', '64kbps'),
|
|
|
91 |
|
92 |
|
93 |
def get_format_profile(itag):
|
94 |
+
"""Get dditional format information for a given itag.
|
95 |
|
96 |
:param str itag:
|
97 |
YouTube format identifier code.
|
pytube/logging.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.logging
|
4 |
-
~~~~~~~~~~~~~~
|
5 |
-
|
6 |
-
This module implements a log factory.
|
7 |
-
"""
|
8 |
from __future__ import absolute_import
|
9 |
|
10 |
import logging
|
11 |
|
12 |
|
13 |
def create_logger(level=logging.DEBUG):
|
|
|
|
|
|
|
|
|
|
|
14 |
fmt = '[%(asctime)s] %(levelname)s in %(module)s: %(message)s'
|
15 |
date_fmt = '%H:%M:%S'
|
16 |
formatter = logging.Formatter(fmt, datefmt=date_fmt)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""This module implements a log factory."""
|
|
|
|
|
|
|
|
|
|
|
3 |
from __future__ import absolute_import
|
4 |
|
5 |
import logging
|
6 |
|
7 |
|
8 |
def create_logger(level=logging.DEBUG):
|
9 |
+
"""Create a configured instance of logger.
|
10 |
+
|
11 |
+
:param int level:
|
12 |
+
Describe the severity level of the logs to handle.
|
13 |
+
"""
|
14 |
fmt = '[%(asctime)s] %(levelname)s in %(module)s: %(message)s'
|
15 |
date_fmt = '%H:%M:%S'
|
16 |
formatter = logging.Formatter(fmt, datefmt=date_fmt)
|
pytube/mixins.py
CHANGED
@@ -1,11 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.mixins
|
4 |
-
~~~~~~~~~~~~~
|
5 |
-
|
6 |
-
Applies in-place data mutations.
|
7 |
-
|
8 |
-
"""
|
9 |
from __future__ import absolute_import
|
10 |
|
11 |
import logging
|
@@ -20,7 +14,7 @@ logger = logging.getLogger(__name__)
|
|
20 |
|
21 |
|
22 |
def apply_signature(config_args, fmt, js):
|
23 |
-
"""
|
24 |
|
25 |
:param dict config_args:
|
26 |
Details of the media streams available.
|
@@ -57,7 +51,7 @@ def apply_signature(config_args, fmt, js):
|
|
57 |
|
58 |
|
59 |
def apply_descrambler(stream_data, key):
|
60 |
-
"""
|
61 |
|
62 |
:param dict dct:
|
63 |
Dictionary containing query string encoded values.
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""Applies in-place data mutations."""
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from __future__ import absolute_import
|
4 |
|
5 |
import logging
|
|
|
14 |
|
15 |
|
16 |
def apply_signature(config_args, fmt, js):
|
17 |
+
"""Apply the decrypted signature to the stream manifest.
|
18 |
|
19 |
:param dict config_args:
|
20 |
Details of the media streams available.
|
|
|
51 |
|
52 |
|
53 |
def apply_descrambler(stream_data, key):
|
54 |
+
"""Apply various in-place transforms to YouTube's media stream data.
|
55 |
|
56 |
:param dict dct:
|
57 |
Dictionary containing query string encoded values.
|
pytube/query.py
CHANGED
@@ -1,19 +1,12 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.query
|
4 |
-
~~~~~~~~~~~~
|
5 |
-
|
6 |
-
The :class:`StreamQuery <StreamQuery>` class provides an interface for querying
|
7 |
-
the available media streams.
|
8 |
-
"""
|
9 |
|
10 |
|
11 |
class StreamQuery:
|
|
|
12 |
|
13 |
def __init__(self, fmt_streams):
|
14 |
-
"""
|
15 |
-
"""
|
16 |
-
|
17 |
# list of :class:`Stream <Stream>` instances.
|
18 |
self.fmt_streams = fmt_streams
|
19 |
self.itag_index = {int(s.itag): s for s in fmt_streams}
|
@@ -24,8 +17,7 @@ class StreamQuery:
|
|
24 |
bitrate=None, video_codec=None, audio_codec=None,
|
25 |
custom_filter_functions=None,
|
26 |
):
|
27 |
-
"""Apply the given filtering criterion
|
28 |
-
:class:`StreamQuery <StreamQuery>`.
|
29 |
|
30 |
:param int fps:
|
31 |
(optional) The frames per second (30 or 60)
|
@@ -54,6 +46,7 @@ class StreamQuery:
|
|
54 |
:param list custom_filter_functions:
|
55 |
(optional) Interface for defining complex filters without
|
56 |
subclassing.
|
|
|
57 |
"""
|
58 |
filters = []
|
59 |
if res or resolution:
|
@@ -90,10 +83,11 @@ class StreamQuery:
|
|
90 |
return StreamQuery(fmt_streams)
|
91 |
|
92 |
def get_by_itag(self, itag):
|
93 |
-
"""
|
94 |
|
95 |
:param str itag:
|
96 |
YouTube format identifier code.
|
|
|
97 |
"""
|
98 |
try:
|
99 |
return self.itag_index[itag]
|
@@ -101,7 +95,9 @@ class StreamQuery:
|
|
101 |
pass
|
102 |
|
103 |
def first(self):
|
104 |
-
"""
|
|
|
|
|
105 |
contain any streams.
|
106 |
|
107 |
"""
|
@@ -111,7 +107,9 @@ class StreamQuery:
|
|
111 |
pass
|
112 |
|
113 |
def last(self):
|
114 |
-
"""
|
|
|
|
|
115 |
contain any streams.
|
116 |
|
117 |
"""
|
@@ -121,9 +119,9 @@ class StreamQuery:
|
|
121 |
pass
|
122 |
|
123 |
def count(self):
|
124 |
-
"""
|
125 |
return len(self.fmt_streams)
|
126 |
|
127 |
def all(self):
|
128 |
-
"""
|
129 |
return self.fmt_streams
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""This module provides a query interface for media streams."""
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
|
5 |
class StreamQuery:
|
6 |
+
"""Interface for querying the available media streams."""
|
7 |
|
8 |
def __init__(self, fmt_streams):
|
9 |
+
"""Construct a :class:`StreamQuery <StreamQuery>`."""
|
|
|
|
|
10 |
# list of :class:`Stream <Stream>` instances.
|
11 |
self.fmt_streams = fmt_streams
|
12 |
self.itag_index = {int(s.itag): s for s in fmt_streams}
|
|
|
17 |
bitrate=None, video_codec=None, audio_codec=None,
|
18 |
custom_filter_functions=None,
|
19 |
):
|
20 |
+
"""Apply the given filtering criterion.
|
|
|
21 |
|
22 |
:param int fps:
|
23 |
(optional) The frames per second (30 or 60)
|
|
|
46 |
:param list custom_filter_functions:
|
47 |
(optional) Interface for defining complex filters without
|
48 |
subclassing.
|
49 |
+
|
50 |
"""
|
51 |
filters = []
|
52 |
if res or resolution:
|
|
|
83 |
return StreamQuery(fmt_streams)
|
84 |
|
85 |
def get_by_itag(self, itag):
|
86 |
+
"""Get a :class:`Stream <Stream>` for an itag, or None if not found.
|
87 |
|
88 |
:param str itag:
|
89 |
YouTube format identifier code.
|
90 |
+
|
91 |
"""
|
92 |
try:
|
93 |
return self.itag_index[itag]
|
|
|
95 |
pass
|
96 |
|
97 |
def first(self):
|
98 |
+
"""Get the first element in the results.
|
99 |
+
|
100 |
+
Return the first result of this query or None if the result doesn't
|
101 |
contain any streams.
|
102 |
|
103 |
"""
|
|
|
107 |
pass
|
108 |
|
109 |
def last(self):
|
110 |
+
"""Get the last element in the results.
|
111 |
+
|
112 |
+
Return the last result of this query or None if the result doesn't
|
113 |
contain any streams.
|
114 |
|
115 |
"""
|
|
|
119 |
pass
|
120 |
|
121 |
def count(self):
|
122 |
+
"""Get the count the query would return."""
|
123 |
return len(self.fmt_streams)
|
124 |
|
125 |
def all(self):
|
126 |
+
"""Get all the results represented by this query as a list."""
|
127 |
return self.fmt_streams
|
pytube/request.py
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
-
pytube.request
|
4 |
-
~~~~~~~~~~~~~~
|
5 |
-
|
6 |
-
Implements a simple wrapper around urlopen.
|
7 |
-
"""
|
8 |
import multiprocessing
|
9 |
|
10 |
from pytube.compat import urlopen
|
@@ -14,7 +9,7 @@ def get(
|
|
14 |
url=None, urls=[], processes=2, headers=False,
|
15 |
streaming=False, chunk_size=8 * 1024,
|
16 |
):
|
17 |
-
"""
|
18 |
|
19 |
:param str url:
|
20 |
The URL to perform the GET request for.
|
@@ -46,7 +41,7 @@ def get(
|
|
46 |
|
47 |
|
48 |
def stream_response(response, chunk_size=8 * 1024):
|
49 |
-
"""
|
50 |
while True:
|
51 |
buf = response.read(chunk_size)
|
52 |
if not buf:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""Implements a simple wrapper around urlopen."""
|
|
|
|
|
|
|
|
|
|
|
3 |
import multiprocessing
|
4 |
|
5 |
from pytube.compat import urlopen
|
|
|
9 |
url=None, urls=[], processes=2, headers=False,
|
10 |
streaming=False, chunk_size=8 * 1024,
|
11 |
):
|
12 |
+
"""Send an http GET request.
|
13 |
|
14 |
:param str url:
|
15 |
The URL to perform the GET request for.
|
|
|
41 |
|
42 |
|
43 |
def stream_response(response, chunk_size=8 * 1024):
|
44 |
+
"""Read the :module:`urlopen` response in chunks."""
|
45 |
while True:
|
46 |
buf = response.read(chunk_size)
|
47 |
if not buf:
|
pytube/streams.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
-
|
4 |
-
~~~~~~~~~~~~~~
|
5 |
|
6 |
A container object for the media stream (video only / audio only / video+audio
|
7 |
combined). This was referred to as ``Video`` in the legacy pytube version, but
|
@@ -25,9 +24,10 @@ logger = logging.getLogger(__name__)
|
|
25 |
|
26 |
|
27 |
class Stream(object):
|
|
|
28 |
|
29 |
def __init__(self, stream, player_config, monostate):
|
30 |
-
"""
|
31 |
|
32 |
:param dict stream:
|
33 |
The unscrambled data extracted from YouTube.
|
@@ -83,7 +83,7 @@ class Stream(object):
|
|
83 |
self.video_codec, self.audio_codec = self.parse_codecs()
|
84 |
|
85 |
def set_attributes_from_dict(self, dct):
|
86 |
-
"""
|
87 |
for key, val in dct.items():
|
88 |
setattr(self, key, val)
|
89 |
|
@@ -105,7 +105,9 @@ class Stream(object):
|
|
105 |
return self.type == 'video'
|
106 |
|
107 |
def parse_codecs(self):
|
108 |
-
"""
|
|
|
|
|
109 |
consitant two element tuple, with the video codec as the first element
|
110 |
and audio as the second. Returns ``None`` if one is not available
|
111 |
(adaptive only).
|
@@ -124,15 +126,13 @@ class Stream(object):
|
|
124 |
@property
|
125 |
@memoize
|
126 |
def filesize(self):
|
127 |
-
"""
|
128 |
headers = request.get(self.url, headers=True)
|
129 |
return int(headers['Content-Length'])
|
130 |
|
131 |
@property
|
132 |
def default_filename(self):
|
133 |
-
"""
|
134 |
-
filesystem.
|
135 |
-
"""
|
136 |
title = self.player_config['args']['title']
|
137 |
filename = safe_filename(title)
|
138 |
return '{filename}.{s.subtype}'.format(filename=filename, s=self)
|
@@ -145,7 +145,6 @@ class Stream(object):
|
|
145 |
specified, defaults to the current working directory.
|
146 |
|
147 |
"""
|
148 |
-
|
149 |
# TODO(nficano): allow a filename to specified.
|
150 |
output_path = output_path or os.getcwd()
|
151 |
|
@@ -165,10 +164,11 @@ class Stream(object):
|
|
165 |
self.on_progress(chunk, fh, bytes_remaining)
|
166 |
|
167 |
def on_progress(self, chunk, file_handler, bytes_remaining):
|
168 |
-
"""
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
172 |
|
173 |
:param str chunk:
|
174 |
Segment of media file binary data, not yet written to disk.
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""
|
3 |
+
This module contrains a container for stream manifest data.
|
|
|
4 |
|
5 |
A container object for the media stream (video only / audio only / video+audio
|
6 |
combined). This was referred to as ``Video`` in the legacy pytube version, but
|
|
|
24 |
|
25 |
|
26 |
class Stream(object):
|
27 |
+
"""Container for stream manifest data."""
|
28 |
|
29 |
def __init__(self, stream, player_config, monostate):
|
30 |
+
"""Construct a :class:`Stream <Stream>`.
|
31 |
|
32 |
:param dict stream:
|
33 |
The unscrambled data extracted from YouTube.
|
|
|
83 |
self.video_codec, self.audio_codec = self.parse_codecs()
|
84 |
|
85 |
def set_attributes_from_dict(self, dct):
|
86 |
+
"""Set class attributes from dictionary items."""
|
87 |
for key, val in dct.items():
|
88 |
setattr(self, key, val)
|
89 |
|
|
|
105 |
return self.type == 'video'
|
106 |
|
107 |
def parse_codecs(self):
|
108 |
+
"""Get the video/audio codecs from list of codecs.
|
109 |
+
|
110 |
+
Parse a variable length sized list of codecs and returns a
|
111 |
consitant two element tuple, with the video codec as the first element
|
112 |
and audio as the second. Returns ``None`` if one is not available
|
113 |
(adaptive only).
|
|
|
126 |
@property
|
127 |
@memoize
|
128 |
def filesize(self):
|
129 |
+
"""File size of the media stream in bytes."""
|
130 |
headers = request.get(self.url, headers=True)
|
131 |
return int(headers['Content-Length'])
|
132 |
|
133 |
@property
|
134 |
def default_filename(self):
|
135 |
+
"""Generate filename based on the video title."""
|
|
|
|
|
136 |
title = self.player_config['args']['title']
|
137 |
filename = safe_filename(title)
|
138 |
return '{filename}.{s.subtype}'.format(filename=filename, s=self)
|
|
|
145 |
specified, defaults to the current working directory.
|
146 |
|
147 |
"""
|
|
|
148 |
# TODO(nficano): allow a filename to specified.
|
149 |
output_path = output_path or os.getcwd()
|
150 |
|
|
|
164 |
self.on_progress(chunk, fh, bytes_remaining)
|
165 |
|
166 |
def on_progress(self, chunk, file_handler, bytes_remaining):
|
167 |
+
"""On progress callback function.
|
168 |
+
|
169 |
+
This function writes the binary data to the file, then checks if an
|
170 |
+
additional callback is defined in the monostate. This is exposed to
|
171 |
+
allow things like displaying a progress bar.
|
172 |
|
173 |
:param str chunk:
|
174 |
Segment of media file binary data, not yet written to disk.
|
setup.cfg
CHANGED
@@ -4,20 +4,11 @@ tag = True
|
|
4 |
current_version = 6.4.3
|
5 |
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+))?
|
6 |
serialize =
|
7 |
-
|
8 |
|
9 |
[metadata]
|
10 |
description-file = README.md
|
11 |
|
12 |
-
[nosetests]
|
13 |
-
verbosity = 1
|
14 |
-
detailed-errors = 1
|
15 |
-
with-coverage = 1
|
16 |
-
cover-package = pytube
|
17 |
-
debug = nose.loader
|
18 |
-
pdb = 0
|
19 |
-
pdb-failures = 0
|
20 |
-
|
21 |
[bumpversion:file:setup.py]
|
22 |
|
23 |
[bumpversion:file:pytube/__init__.py]
|
|
|
4 |
current_version = 6.4.3
|
5 |
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+))?
|
6 |
serialize =
|
7 |
+
{major}.{minor}.{patch}
|
8 |
|
9 |
[metadata]
|
10 |
description-file = README.md
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
[bumpversion:file:setup.py]
|
13 |
|
14 |
[bumpversion:file:pytube/__init__.py]
|