Taylor Fox Dahlin commited on
Commit
9e555f0
·
unverified ·
1 Parent(s): ff8335d

Fix #797 (#804)

Browse files

* Changed base_js get_ytplayer_config to work with embed html; updated js_url to reflect this.
* Updated helper to better reflect a real example of returned HTML.

pytube/cipher.py CHANGED
@@ -247,7 +247,7 @@ def splice(arr: List, b: int):
247
  >>> splice([1, 2, 3, 4], 2)
248
  [1, 2]
249
  """
250
- return arr[:b]
251
 
252
 
253
  def swap(arr: List, b: int):
 
247
  >>> splice([1, 2, 3, 4], 2)
248
  [1, 2]
249
  """
250
+ return arr[b:]
251
 
252
 
253
  def swap(arr: List, b: int):
pytube/extract.py CHANGED
@@ -174,7 +174,10 @@ def js_url(html: str) -> str:
174
  :param str html:
175
  The html contents of the watch page.
176
  """
177
- base_js = get_ytplayer_js(html)
 
 
 
178
  return "https://youtube.com" + base_js
179
 
180
 
@@ -215,8 +218,7 @@ def get_ytplayer_js(html: str) -> Any:
215
  Path to YouTube's base.js file.
216
  """
217
  js_url_patterns = [
218
- r"\"jsUrl\":\"([^\"]*)\"",
219
- r"\"js\":\"([^\"]*base\.js)\""
220
  ]
221
  for pattern in js_url_patterns:
222
  regex = re.compile(pattern)
@@ -244,11 +246,10 @@ def get_ytplayer_config(html: str) -> Any:
244
  :returns:
245
  Substring of the html containing the encoded manifest data.
246
  """
 
247
  config_patterns = [
248
  r";ytplayer\.config\s*=\s*({.*?});",
249
- r"yt\.setConfig\(.*'PLAYER_CONFIG':\s*({.+?})"
250
  ]
251
- logger.debug("finding initial function name")
252
  for pattern in config_patterns:
253
  regex = re.compile(pattern)
254
  function_match = regex.search(html)
@@ -257,8 +258,24 @@ def get_ytplayer_config(html: str) -> Any:
257
  yt_player_config = function_match.group(1)
258
  return json.loads(yt_player_config)
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  raise RegexMatchError(
261
- caller="get_ytplayer_config", pattern="config_patterns"
262
  )
263
 
264
 
 
174
  :param str html:
175
  The html contents of the watch page.
176
  """
177
+ try:
178
+ base_js = get_ytplayer_config(html)['assets']['js']
179
+ except KeyError:
180
+ base_js = get_ytplayer_js(html)
181
  return "https://youtube.com" + base_js
182
 
183
 
 
218
  Path to YouTube's base.js file.
219
  """
220
  js_url_patterns = [
221
+ r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)"
 
222
  ]
223
  for pattern in js_url_patterns:
224
  regex = re.compile(pattern)
 
246
  :returns:
247
  Substring of the html containing the encoded manifest data.
248
  """
249
+ logger.debug("finding initial function name")
250
  config_patterns = [
251
  r";ytplayer\.config\s*=\s*({.*?});",
 
252
  ]
 
253
  for pattern in config_patterns:
254
  regex = re.compile(pattern)
255
  function_match = regex.search(html)
 
258
  yt_player_config = function_match.group(1)
259
  return json.loads(yt_player_config)
260
 
261
+ # setConfig() needs to be handled a little differently.
262
+ # We want to parse the entire argument to setConfig()
263
+ # and use then load that as json to find PLAYER_CONFIG
264
+ # inside of it.
265
+ setconfig_patterns = [
266
+ r"yt\.setConfig\((.*'PLAYER_CONFIG':\s*{.+?})\);",
267
+ r"yt\.setConfig\((.*\"PLAYER_CONFIG\":\s*{.+?})\);"
268
+ ]
269
+ for pattern in setconfig_patterns:
270
+ regex = re.compile(pattern)
271
+ function_match = regex.search(html)
272
+ if function_match:
273
+ logger.debug("finished regex search, matched: %s", pattern)
274
+ yt_config = function_match.group(1)
275
+ return json.loads(yt_config)['PLAYER_CONFIG']
276
+
277
  raise RegexMatchError(
278
+ caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns"
279
  )
280
 
281
 
tests/test_cipher.py CHANGED
@@ -26,5 +26,5 @@ def test_reverse():
26
 
27
 
28
  def test_splice():
29
- assert cipher.splice([1, 2, 3, 4], 2) == [1, 2]
30
- assert cipher.splice([1, 2, 3, 4], 1) == [1]
 
26
 
27
 
28
  def test_splice():
29
+ assert cipher.splice([1, 2, 3, 4], 2) == [3, 4]
30
+ assert cipher.splice([1, 2, 3, 4], 1) == [2, 3, 4]
tests/test_helpers.py CHANGED
@@ -121,7 +121,8 @@ def test_create_mock_html_json(mock_url_open, mock_open):
121
  # 2. vid_info_raw
122
  # 3. js
123
  mock_url_open_object.read.side_effect = [
124
- b'"jsUrl":"base.js"',
 
125
  b'vid_info_raw',
126
  b'js_result',
127
  ]
 
121
  # 2. vid_info_raw
122
  # 3. js
123
  mock_url_open_object.read.side_effect = [
124
+ (b'yt.setConfig({"PLAYER_CONFIG":{"args":[]}});'
125
+ b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
126
  b'vid_info_raw',
127
  b'js_result',
128
  ]