cipher object
Browse files- .gitignore +4 -0
- pytube/cipher.py +68 -68
- pytube/extract.py +3 -2
- tests/test_cipher.py +0 -11
.gitignore
CHANGED
@@ -54,7 +54,11 @@ coverage.xml
|
|
54 |
*.cover
|
55 |
.hypothesis/
|
56 |
.pytest_cache/
|
|
|
|
|
|
|
57 |
prof/
|
|
|
58 |
|
59 |
# Debian Files
|
60 |
debian/files
|
|
|
54 |
*.cover
|
55 |
.hypothesis/
|
56 |
.pytest_cache/
|
57 |
+
*.mp4
|
58 |
+
|
59 |
+
# Performance profiling
|
60 |
prof/
|
61 |
+
*.cprof
|
62 |
|
63 |
# Debian Files
|
64 |
debian/files
|
pytube/cipher.py
CHANGED
@@ -20,11 +20,78 @@ from itertools import chain
|
|
20 |
from typing import List, Tuple, Dict, Callable, Any, Optional
|
21 |
|
22 |
from pytube.exceptions import RegexMatchError
|
23 |
-
from pytube.helpers import regex_search, create_logger
|
24 |
|
25 |
logger = create_logger()
|
26 |
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def get_initial_function_name(js: str) -> str:
|
29 |
"""Extract the name of the function responsible for computing the signature.
|
30 |
:param str js:
|
@@ -48,7 +115,6 @@ def get_initial_function_name(js: str) -> str:
|
|
48 |
r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
49 |
r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
50 |
]
|
51 |
-
|
52 |
logger.debug("finding initial function name")
|
53 |
for pattern in function_patterns:
|
54 |
regex = re.compile(pattern)
|
@@ -71,7 +137,6 @@ def get_transform_plan(js: str) -> List[str]:
|
|
71 |
|
72 |
**Example**:
|
73 |
|
74 |
-
>>> get_transform_plan(js)
|
75 |
['DE.AJ(a,15)',
|
76 |
'DE.VR(a,3)',
|
77 |
'DE.AJ(a,51)',
|
@@ -222,68 +287,3 @@ def map_functions(js_func: str) -> Callable:
|
|
222 |
if re.search(pattern, js_func):
|
223 |
return fn
|
224 |
raise RegexMatchError(caller="map_functions", pattern="multiple")
|
225 |
-
|
226 |
-
|
227 |
-
def parse_function(js_func: str) -> Tuple[str, int]:
|
228 |
-
"""Parse the Javascript transform function.
|
229 |
-
|
230 |
-
Break a JavaScript transform function down into a two element ``tuple``
|
231 |
-
containing the function name and some integer-based argument.
|
232 |
-
|
233 |
-
:param str js_func:
|
234 |
-
The JavaScript version of the transform function.
|
235 |
-
:rtype: tuple
|
236 |
-
:returns:
|
237 |
-
two element tuple containing the function name and an argument.
|
238 |
-
|
239 |
-
**Example**:
|
240 |
-
|
241 |
-
>>> parse_function('DE.AJ(a,15)')
|
242 |
-
('AJ', 15)
|
243 |
-
|
244 |
-
"""
|
245 |
-
logger.debug("parsing transform function")
|
246 |
-
pattern = r"\w+\.(\w+)\(\w,(\d+)\)"
|
247 |
-
regex = re.compile(pattern)
|
248 |
-
parse_match = regex.search(js_func)
|
249 |
-
if not parse_match:
|
250 |
-
raise RegexMatchError(caller="parse_function", pattern=pattern)
|
251 |
-
fn_name, fn_arg = parse_match.groups()
|
252 |
-
return fn_name, int(fn_arg)
|
253 |
-
|
254 |
-
|
255 |
-
def get_signature(js: str, ciphered_signature: str) -> str:
|
256 |
-
"""Decipher the signature.
|
257 |
-
|
258 |
-
Taking the ciphered signature, applies the transform functions.
|
259 |
-
|
260 |
-
:param str js:
|
261 |
-
The contents of the base.js asset file.
|
262 |
-
:param str ciphered_signature:
|
263 |
-
The ciphered signature sent in the ``player_config``.
|
264 |
-
:rtype: str
|
265 |
-
:returns:
|
266 |
-
Decrypted signature required to download the media content.
|
267 |
-
|
268 |
-
"""
|
269 |
-
transform_plan = get_transform_plan(js)
|
270 |
-
var, _ = transform_plan[0].split(".")
|
271 |
-
transform_map = get_transform_map(js, var)
|
272 |
-
signature = list(ciphered_signature)
|
273 |
-
|
274 |
-
for js_func in transform_plan:
|
275 |
-
name, argument = parse_function(js_func)
|
276 |
-
signature = transform_map[name](signature, argument)
|
277 |
-
logger.debug(
|
278 |
-
"applied transform function\n"
|
279 |
-
"output: %s\n"
|
280 |
-
"js_function: %s\n"
|
281 |
-
"argument: %d\n"
|
282 |
-
"function: %s",
|
283 |
-
"".join(signature),
|
284 |
-
name,
|
285 |
-
argument,
|
286 |
-
transform_map[name],
|
287 |
-
)
|
288 |
-
|
289 |
-
return "".join(signature)
|
|
|
20 |
from typing import List, Tuple, Dict, Callable, Any, Optional
|
21 |
|
22 |
from pytube.exceptions import RegexMatchError
|
23 |
+
from pytube.helpers import regex_search, create_logger, cache
|
24 |
|
25 |
logger = create_logger()
|
26 |
|
27 |
|
28 |
+
class Cipher:
|
29 |
+
def __init__(self, js: str):
|
30 |
+
self.transform_plan = get_transform_plan(js)
|
31 |
+
var, _ = self.transform_plan[0].split(".")
|
32 |
+
self.transform_map = get_transform_map(js, var)
|
33 |
+
self.js_func_regex = re.compile(r"\w+\.(\w+)\(\w,(\d+)\)")
|
34 |
+
|
35 |
+
def get_signature(self, ciphered_signature: str) -> str:
|
36 |
+
"""Decipher the signature.
|
37 |
+
|
38 |
+
Taking the ciphered signature, applies the transform functions.
|
39 |
+
|
40 |
+
:param str js:
|
41 |
+
The contents of the base.js asset file.
|
42 |
+
:param str ciphered_signature:
|
43 |
+
The ciphered signature sent in the ``player_config``.
|
44 |
+
:rtype: str
|
45 |
+
:returns:
|
46 |
+
Decrypted signature required to download the media content.
|
47 |
+
|
48 |
+
"""
|
49 |
+
signature = list(ciphered_signature)
|
50 |
+
|
51 |
+
for js_func in self.transform_plan:
|
52 |
+
name, argument = self.parse_function(js_func)
|
53 |
+
signature = self.transform_map[name](signature, argument)
|
54 |
+
logger.debug(
|
55 |
+
"applied transform function\n"
|
56 |
+
"output: %s\n"
|
57 |
+
"js_function: %s\n"
|
58 |
+
"argument: %d\n"
|
59 |
+
"function: %s",
|
60 |
+
"".join(signature),
|
61 |
+
name,
|
62 |
+
argument,
|
63 |
+
self.transform_map[name],
|
64 |
+
)
|
65 |
+
|
66 |
+
return "".join(signature)
|
67 |
+
|
68 |
+
@cache
|
69 |
+
def parse_function(self, js_func: str) -> Tuple[str, int]:
|
70 |
+
"""Parse the Javascript transform function.
|
71 |
+
|
72 |
+
Break a JavaScript transform function down into a two element ``tuple``
|
73 |
+
containing the function name and some integer-based argument.
|
74 |
+
|
75 |
+
:param str js_func:
|
76 |
+
The JavaScript version of the transform function.
|
77 |
+
:rtype: tuple
|
78 |
+
:returns:
|
79 |
+
two element tuple containing the function name and an argument.
|
80 |
+
|
81 |
+
**Example**:
|
82 |
+
|
83 |
+
>>> parse_function('DE.AJ(a,15)')
|
84 |
+
('AJ', 15)
|
85 |
+
|
86 |
+
"""
|
87 |
+
logger.debug("parsing transform function")
|
88 |
+
parse_match = self.js_func_regex.search(js_func)
|
89 |
+
if not parse_match:
|
90 |
+
raise RegexMatchError(caller="parse_function", pattern="js_func_regex")
|
91 |
+
fn_name, fn_arg = parse_match.groups()
|
92 |
+
return fn_name, int(fn_arg)
|
93 |
+
|
94 |
+
|
95 |
def get_initial_function_name(js: str) -> str:
|
96 |
"""Extract the name of the function responsible for computing the signature.
|
97 |
:param str js:
|
|
|
115 |
r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
116 |
r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
|
117 |
]
|
|
|
118 |
logger.debug("finding initial function name")
|
119 |
for pattern in function_patterns:
|
120 |
regex = re.compile(pattern)
|
|
|
137 |
|
138 |
**Example**:
|
139 |
|
|
|
140 |
['DE.AJ(a,15)',
|
141 |
'DE.VR(a,3)',
|
142 |
'DE.AJ(a,51)',
|
|
|
287 |
if re.search(pattern, js_func):
|
288 |
return fn
|
289 |
raise RegexMatchError(caller="map_functions", pattern="multiple")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytube/extract.py
CHANGED
@@ -10,7 +10,7 @@ from typing import Any, Optional, Tuple, List, Dict
|
|
10 |
from urllib.parse import quote, parse_qs, unquote, parse_qsl
|
11 |
from urllib.parse import urlencode
|
12 |
|
13 |
-
from pytube import
|
14 |
from pytube.exceptions import RegexMatchError, HTMLParseError, LiveStreamError
|
15 |
from pytube.helpers import regex_search, logger
|
16 |
|
@@ -224,6 +224,7 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
|
|
224 |
The contents of the base.js asset file.
|
225 |
|
226 |
"""
|
|
|
227 |
stream_manifest = config_args[fmt]
|
228 |
live_stream = (
|
229 |
json.loads(config_args["player_response"])
|
@@ -247,7 +248,7 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
|
|
247 |
continue
|
248 |
|
249 |
if js is not None:
|
250 |
-
signature = cipher.get_signature(
|
251 |
else:
|
252 |
# signature not present in url (line 33), need js to descramble
|
253 |
# TypeError caught in __main__
|
|
|
10 |
from urllib.parse import quote, parse_qs, unquote, parse_qsl
|
11 |
from urllib.parse import urlencode
|
12 |
|
13 |
+
from pytube.cipher import Cipher
|
14 |
from pytube.exceptions import RegexMatchError, HTMLParseError, LiveStreamError
|
15 |
from pytube.helpers import regex_search, logger
|
16 |
|
|
|
224 |
The contents of the base.js asset file.
|
225 |
|
226 |
"""
|
227 |
+
cipher = Cipher(js=js)
|
228 |
stream_manifest = config_args[fmt]
|
229 |
live_stream = (
|
230 |
json.loads(config_args["player_response"])
|
|
|
248 |
continue
|
249 |
|
250 |
if js is not None:
|
251 |
+
signature = cipher.get_signature(ciphered_signature=stream["s"])
|
252 |
else:
|
253 |
# signature not present in url (line 33), need js to descramble
|
254 |
# TypeError caught in __main__
|
tests/test_cipher.py
CHANGED
@@ -20,17 +20,6 @@ def test_get_transform_object_with_no_match_should_error():
|
|
20 |
cipher.get_transform_object("asdf", var="lt")
|
21 |
|
22 |
|
23 |
-
def test_parse_function_with_match():
|
24 |
-
fn_name, fn_arg = cipher.parse_function("DE.AJ(a,15)")
|
25 |
-
assert fn_name == "AJ"
|
26 |
-
assert fn_arg == 15
|
27 |
-
|
28 |
-
|
29 |
-
def test_parse_function_with_no_match_should_error():
|
30 |
-
with pytest.raises(RegexMatchError):
|
31 |
-
cipher.parse_function("asdf")
|
32 |
-
|
33 |
-
|
34 |
def test_reverse():
|
35 |
reversed_array = cipher.reverse([1, 2, 3, 4], None)
|
36 |
assert reversed_array == [4, 3, 2, 1]
|
|
|
20 |
cipher.get_transform_object("asdf", var="lt")
|
21 |
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def test_reverse():
|
24 |
reversed_array = cipher.reverse([1, 2, 3, 4], None)
|
25 |
assert reversed_array == [4, 3, 2, 1]
|