Kinshuk Vasisht commited on
Commit
cd26b1c
·
unverified ·
1 Parent(s): e857546

Fix regex to find throttling function name (#1282)

Browse files

* Generalize regex to find throttle function name
* Extend tests and fixtures for throttling name
* Rename mock asset to follow similar naming style

pytube/cipher.py CHANGED
@@ -270,7 +270,7 @@ def get_throttling_function_name(js: str) -> str:
270
  # Bpa.length || iha("")) }};
271
  # In the above case, `iha` is the relevant function name
272
  r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
273
- r'\([a-z]\s*=\s*([a-zA-Z0-9$]{3})(\[\d+\])?\([a-z]\)',
274
  ]
275
  logger.debug('Finding throttling function name')
276
  for pattern in function_patterns:
@@ -285,7 +285,7 @@ def get_throttling_function_name(js: str) -> str:
285
  idx = idx.strip("[]")
286
  array = re.search(
287
  r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
288
- nfunc=function_match.group(1)),
289
  js
290
  )
291
  if array:
 
270
  # Bpa.length || iha("")) }};
271
  # In the above case, `iha` is the relevant function name
272
  r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
273
+ r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
274
  ]
275
  logger.debug('Finding throttling function name')
276
  for pattern in function_patterns:
 
285
  idx = idx.strip("[]")
286
  array = re.search(
287
  r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
288
+ nfunc=re.escape(function_match.group(1))),
289
  js
290
  )
291
  if array:
tests/conftest.py CHANGED
@@ -150,13 +150,17 @@ def channel_videos_html():
150
 
151
  @pytest.fixture
152
  def base_js():
153
- """Youtube base.js retrieved on 2022-02-04 from
154
- https://www.youtube.com/watch?v=vmzxpUsN0uA
 
155
  """
156
- file_path = os.path.join(
157
- os.path.dirname(os.path.realpath(__file__)),
158
- "mocks",
159
- "base.js.gz",
160
- )
161
- with gzip.open(file_path, 'rb') as f:
162
- return f.read().decode('utf-8')
 
 
 
 
150
 
151
  @pytest.fixture
152
  def base_js():
153
+ """Youtube base.js files retrieved on 2022-02-04 and 2022-04-15
154
+ from https://www.youtube.com/watch?v=vmzxpUsN0uA and
155
+ https://www.youtube.com/watch?v=Y4-GSFKZmEg respectively
156
  """
157
+ base_js_files = []
158
+ for file in ["base.js-2022-02-04.gz", "base.js-2022-04-15.gz"]:
159
+ file_path = os.path.join(
160
+ os.path.dirname(os.path.realpath(__file__)),
161
+ "mocks",
162
+ file,
163
+ )
164
+ with gzip.open(file_path, 'rb') as f:
165
+ base_js_files.append(f.read().decode('utf-8'))
166
+ return base_js_files
tests/mocks/{base.js.gz → base.js-2022-02-04.gz} RENAMED
File without changes
tests/mocks/base.js-2022-04-15.gz ADDED
Binary file (581 kB). View file
 
tests/test_cipher.py CHANGED
@@ -80,11 +80,24 @@ def test_js_splice():
80
 
81
 
82
  def test_get_throttling_function_name(base_js):
83
- # Values expected as of 2022/02/04:
84
- raw_var = r'var Apa=[hha]'
85
- assert raw_var in base_js
86
- raw_code = r'a.url="";a.C&&(b=a.get("n"))&&(b=Apa[0](b),a.set("n",b),'\
87
- r'Apa.length||hha(""))}};'
88
- assert raw_code in base_js
89
- func_name = cipher.get_throttling_function_name(base_js)
90
- assert func_name == "hha"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
  def test_get_throttling_function_name(base_js):
83
+ base_js_code_fragments = [
84
+ # Values expected as of 2022/02/04:
85
+ {
86
+ 'raw_var' : r'var Apa=[hha]',
87
+ 'raw_code': r'a.url="";a.C&&(b=a.get("n"))&&(b=Apa[0](b),a.set("n",b),'\
88
+ r'Apa.length||hha(""))}};',
89
+ 'nfunc_name': 'hha'
90
+ },
91
+ # Values expected as of 2022/04/15:
92
+ {
93
+ 'raw_var' : r'var $x=[uq]',
94
+ 'raw_code': r'a.url="";a.D&&(b=a.get("n"))&&(b=$x[0](b),a.set("n",b),'\
95
+ r'$x.length||uq(""))',
96
+ 'nfunc_name': 'uq'
97
+ }
98
+ ]
99
+ for code_fragment, base_js_file in zip(base_js_code_fragments, base_js):
100
+ assert code_fragment['raw_var'] in base_js_file
101
+ assert code_fragment['raw_code'] in base_js_file
102
+ func_name = cipher.get_throttling_function_name(base_js_file)
103
+ assert func_name == code_fragment['nfunc_name']