hbmartin commited on
Commit
60a069d
·
1 Parent(s): 17913ce

cipher object

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. pytube/cipher.py +68 -68
  3. pytube/extract.py +3 -2
  4. tests/test_cipher.py +0 -11
.gitignore CHANGED
@@ -54,7 +54,11 @@ coverage.xml
54
  *.cover
55
  .hypothesis/
56
  .pytest_cache/
 
 
 
57
  prof/
 
58
 
59
  # Debian Files
60
  debian/files
 
54
  *.cover
55
  .hypothesis/
56
  .pytest_cache/
57
+ *.mp4
58
+
59
+ # Performance profiling
60
  prof/
61
+ *.cprof
62
 
63
  # Debian Files
64
  debian/files
pytube/cipher.py CHANGED
@@ -20,11 +20,78 @@ from itertools import chain
20
  from typing import List, Tuple, Dict, Callable, Any, Optional
21
 
22
  from pytube.exceptions import RegexMatchError
23
- from pytube.helpers import regex_search, create_logger
24
 
25
  logger = create_logger()
26
 
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def get_initial_function_name(js: str) -> str:
29
  """Extract the name of the function responsible for computing the signature.
30
  :param str js:
@@ -48,7 +115,6 @@ def get_initial_function_name(js: str) -> str:
48
  r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
49
  r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
50
  ]
51
-
52
  logger.debug("finding initial function name")
53
  for pattern in function_patterns:
54
  regex = re.compile(pattern)
@@ -71,7 +137,6 @@ def get_transform_plan(js: str) -> List[str]:
71
 
72
  **Example**:
73
 
74
- >>> get_transform_plan(js)
75
  ['DE.AJ(a,15)',
76
  'DE.VR(a,3)',
77
  'DE.AJ(a,51)',
@@ -222,68 +287,3 @@ def map_functions(js_func: str) -> Callable:
222
  if re.search(pattern, js_func):
223
  return fn
224
  raise RegexMatchError(caller="map_functions", pattern="multiple")
225
-
226
-
227
- def parse_function(js_func: str) -> Tuple[str, int]:
228
- """Parse the Javascript transform function.
229
-
230
- Break a JavaScript transform function down into a two element ``tuple``
231
- containing the function name and some integer-based argument.
232
-
233
- :param str js_func:
234
- The JavaScript version of the transform function.
235
- :rtype: tuple
236
- :returns:
237
- two element tuple containing the function name and an argument.
238
-
239
- **Example**:
240
-
241
- >>> parse_function('DE.AJ(a,15)')
242
- ('AJ', 15)
243
-
244
- """
245
- logger.debug("parsing transform function")
246
- pattern = r"\w+\.(\w+)\(\w,(\d+)\)"
247
- regex = re.compile(pattern)
248
- parse_match = regex.search(js_func)
249
- if not parse_match:
250
- raise RegexMatchError(caller="parse_function", pattern=pattern)
251
- fn_name, fn_arg = parse_match.groups()
252
- return fn_name, int(fn_arg)
253
-
254
-
255
- def get_signature(js: str, ciphered_signature: str) -> str:
256
- """Decipher the signature.
257
-
258
- Taking the ciphered signature, applies the transform functions.
259
-
260
- :param str js:
261
- The contents of the base.js asset file.
262
- :param str ciphered_signature:
263
- The ciphered signature sent in the ``player_config``.
264
- :rtype: str
265
- :returns:
266
- Decrypted signature required to download the media content.
267
-
268
- """
269
- transform_plan = get_transform_plan(js)
270
- var, _ = transform_plan[0].split(".")
271
- transform_map = get_transform_map(js, var)
272
- signature = list(ciphered_signature)
273
-
274
- for js_func in transform_plan:
275
- name, argument = parse_function(js_func)
276
- signature = transform_map[name](signature, argument)
277
- logger.debug(
278
- "applied transform function\n"
279
- "output: %s\n"
280
- "js_function: %s\n"
281
- "argument: %d\n"
282
- "function: %s",
283
- "".join(signature),
284
- name,
285
- argument,
286
- transform_map[name],
287
- )
288
-
289
- return "".join(signature)
 
20
  from typing import List, Tuple, Dict, Callable, Any, Optional
21
 
22
  from pytube.exceptions import RegexMatchError
23
+ from pytube.helpers import regex_search, create_logger, cache
24
 
25
  logger = create_logger()
26
 
27
 
28
+ class Cipher:
29
+ def __init__(self, js: str):
30
+ self.transform_plan = get_transform_plan(js)
31
+ var, _ = self.transform_plan[0].split(".")
32
+ self.transform_map = get_transform_map(js, var)
33
+ self.js_func_regex = re.compile(r"\w+\.(\w+)\(\w,(\d+)\)")
34
+
35
+ def get_signature(self, ciphered_signature: str) -> str:
36
+ """Decipher the signature.
37
+
38
+ Taking the ciphered signature, applies the transform functions.
39
+
40
+ :param str js:
41
+ The contents of the base.js asset file.
42
+ :param str ciphered_signature:
43
+ The ciphered signature sent in the ``player_config``.
44
+ :rtype: str
45
+ :returns:
46
+ Decrypted signature required to download the media content.
47
+
48
+ """
49
+ signature = list(ciphered_signature)
50
+
51
+ for js_func in self.transform_plan:
52
+ name, argument = self.parse_function(js_func)
53
+ signature = self.transform_map[name](signature, argument)
54
+ logger.debug(
55
+ "applied transform function\n"
56
+ "output: %s\n"
57
+ "js_function: %s\n"
58
+ "argument: %d\n"
59
+ "function: %s",
60
+ "".join(signature),
61
+ name,
62
+ argument,
63
+ self.transform_map[name],
64
+ )
65
+
66
+ return "".join(signature)
67
+
68
+ @cache
69
+ def parse_function(self, js_func: str) -> Tuple[str, int]:
70
+ """Parse the Javascript transform function.
71
+
72
+ Break a JavaScript transform function down into a two element ``tuple``
73
+ containing the function name and some integer-based argument.
74
+
75
+ :param str js_func:
76
+ The JavaScript version of the transform function.
77
+ :rtype: tuple
78
+ :returns:
79
+ two element tuple containing the function name and an argument.
80
+
81
+ **Example**:
82
+
83
+ >>> parse_function('DE.AJ(a,15)')
84
+ ('AJ', 15)
85
+
86
+ """
87
+ logger.debug("parsing transform function")
88
+ parse_match = self.js_func_regex.search(js_func)
89
+ if not parse_match:
90
+ raise RegexMatchError(caller="parse_function", pattern="js_func_regex")
91
+ fn_name, fn_arg = parse_match.groups()
92
+ return fn_name, int(fn_arg)
93
+
94
+
95
  def get_initial_function_name(js: str) -> str:
96
  """Extract the name of the function responsible for computing the signature.
97
  :param str js:
 
115
  r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
116
  r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
117
  ]
 
118
  logger.debug("finding initial function name")
119
  for pattern in function_patterns:
120
  regex = re.compile(pattern)
 
137
 
138
  **Example**:
139
 
 
140
  ['DE.AJ(a,15)',
141
  'DE.VR(a,3)',
142
  'DE.AJ(a,51)',
 
287
  if re.search(pattern, js_func):
288
  return fn
289
  raise RegexMatchError(caller="map_functions", pattern="multiple")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytube/extract.py CHANGED
@@ -10,7 +10,7 @@ from typing import Any, Optional, Tuple, List, Dict
10
  from urllib.parse import quote, parse_qs, unquote, parse_qsl
11
  from urllib.parse import urlencode
12
 
13
- from pytube import cipher
14
  from pytube.exceptions import RegexMatchError, HTMLParseError, LiveStreamError
15
  from pytube.helpers import regex_search, logger
16
 
@@ -224,6 +224,7 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
224
  The contents of the base.js asset file.
225
 
226
  """
 
227
  stream_manifest = config_args[fmt]
228
  live_stream = (
229
  json.loads(config_args["player_response"])
@@ -247,7 +248,7 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
247
  continue
248
 
249
  if js is not None:
250
- signature = cipher.get_signature(js, stream["s"])
251
  else:
252
  # signature not present in url (line 33), need js to descramble
253
  # TypeError caught in __main__
 
10
  from urllib.parse import quote, parse_qs, unquote, parse_qsl
11
  from urllib.parse import urlencode
12
 
13
+ from pytube.cipher import Cipher
14
  from pytube.exceptions import RegexMatchError, HTMLParseError, LiveStreamError
15
  from pytube.helpers import regex_search, logger
16
 
 
224
  The contents of the base.js asset file.
225
 
226
  """
227
+ cipher = Cipher(js=js)
228
  stream_manifest = config_args[fmt]
229
  live_stream = (
230
  json.loads(config_args["player_response"])
 
248
  continue
249
 
250
  if js is not None:
251
+ signature = cipher.get_signature(ciphered_signature=stream["s"])
252
  else:
253
  # signature not present in url (line 33), need js to descramble
254
  # TypeError caught in __main__
tests/test_cipher.py CHANGED
@@ -20,17 +20,6 @@ def test_get_transform_object_with_no_match_should_error():
20
  cipher.get_transform_object("asdf", var="lt")
21
 
22
 
23
- def test_parse_function_with_match():
24
- fn_name, fn_arg = cipher.parse_function("DE.AJ(a,15)")
25
- assert fn_name == "AJ"
26
- assert fn_arg == 15
27
-
28
-
29
- def test_parse_function_with_no_match_should_error():
30
- with pytest.raises(RegexMatchError):
31
- cipher.parse_function("asdf")
32
-
33
-
34
  def test_reverse():
35
  reversed_array = cipher.reverse([1, 2, 3, 4], None)
36
  assert reversed_array == [4, 3, 2, 1]
 
20
  cipher.get_transform_object("asdf", var="lt")
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
23
  def test_reverse():
24
  reversed_array = cipher.reverse([1, 2, 3, 4], None)
25
  assert reversed_array == [4, 3, 2, 1]