Alex G commited on
Commit
fc9aec5
1 Parent(s): 10989d2

Fix #1060 (#1067)

Browse files

* User InnerTube in place of `get_video_info` url

* Added some additional base parameters for innertube requests.

* Added Oauth support for innertube client

* Add exception for age-restricted videos which can no longer be accessed without using auth.

* Carved out and simplified code where possible due to API changes.

* Added renderer catch -- fixes #1068

* Additional channel name support for URL-encoded names.

* Updated test mocks, removed region-locked test because that functionality no longer works.

.gitignore CHANGED
@@ -138,3 +138,6 @@ test/**/*.xml
138
  # Common virtual environments
139
  venv/
140
  env/
 
 
 
 
138
  # Common virtual environments
139
  venv/
140
  env/
141
+
142
+ # Token cache location
143
+ __cache__/
pytube/__main__.py CHANGED
@@ -6,16 +6,15 @@ exclusively on the developer interface. Pytube offloads the heavy lifting to
6
  smaller peripheral modules and functions.
7
 
8
  """
9
- import json
10
  import logging
11
  from typing import Any, Callable, Dict, List, Optional
12
- from urllib.parse import parse_qsl
13
 
14
  import pytube
15
  import pytube.exceptions as exceptions
16
  from pytube import extract, request
17
  from pytube import Stream, StreamQuery
18
  from pytube.helpers import install_proxy
 
19
  from pytube.metadata import YouTubeMetadata
20
  from pytube.monostate import Monostate
21
 
@@ -31,6 +30,8 @@ class YouTube:
31
  on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
32
  on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
33
  proxies: Dict[str, str] = None,
 
 
34
  ):
35
  """Construct a :class:`YouTube <YouTube>`.
36
 
@@ -47,19 +48,11 @@ class YouTube:
47
  self._js: Optional[str] = None # js fetched by js_url
48
  self._js_url: Optional[str] = None # the url to the js, parsed from watch html
49
 
50
- # note: vid_info may eventually be removed. It sounds like it once had
51
- # additional formats, but that doesn't appear to still be the case.
52
-
53
- # the url to vid info, parsed from watch html
54
- self._vid_info_url: Optional[str] = None
55
- self._vid_info_raw: Optional[str] = None # content fetched by vid_info_url
56
- self._vid_info: Optional[Dict] = None # parsed content of vid_info_raw
57
 
58
  self._watch_html: Optional[str] = None # the html of /watch?v=<video_id>
59
  self._embed_html: Optional[str] = None
60
  self._player_config_args: Optional[Dict] = None # inline js in the html containing
61
- self._player_response: Optional[Dict] = None
62
- # streams
63
  self._age_restricted: Optional[bool] = None
64
 
65
  self._fmt_streams: Optional[List[Stream]] = None
@@ -85,6 +78,9 @@ class YouTube:
85
  self._title = None
86
  self._publish_date = None
87
 
 
 
 
88
  def __repr__(self):
89
  return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
90
 
@@ -102,13 +98,6 @@ class YouTube:
102
  self._embed_html = request.get(url=self.embed_url)
103
  return self._embed_html
104
 
105
- @property
106
- def vid_info_raw(self):
107
- if self._vid_info_raw:
108
- return self._vid_info_raw
109
- self._vid_info_raw = request.get(self.vid_info_url)
110
- return self._vid_info_raw
111
-
112
  @property
113
  def age_restricted(self):
114
  if self._age_restricted:
@@ -116,21 +105,6 @@ class YouTube:
116
  self._age_restricted = extract.is_age_restricted(self.watch_html)
117
  return self._age_restricted
118
 
119
- @property
120
- def vid_info_url(self):
121
- if self._vid_info_url:
122
- return self._vid_info_url
123
-
124
- if self.age_restricted:
125
- self._vid_info_url = extract.video_info_url_age_restricted(
126
- self.video_id, self.watch_url
127
- )
128
- else:
129
- self._vid_info_url = extract.video_info_url(
130
- video_id=self.video_id, watch_url=self.watch_url
131
- )
132
- return self._vid_info_url
133
-
134
  @property
135
  def js_url(self):
136
  if self._js_url:
@@ -159,20 +133,6 @@ class YouTube:
159
 
160
  return self._js
161
 
162
- @property
163
- def player_response(self):
164
- """The player response contains subtitle information and video details."""
165
- if self._player_response:
166
- return self._player_response
167
-
168
- if isinstance(self.player_config_args["player_response"], str):
169
- self._player_response = json.loads(
170
- self.player_config_args["player_response"]
171
- )
172
- else:
173
- self._player_response = self.player_config_args["player_response"]
174
- return self._player_response
175
-
176
  @property
177
  def initial_data(self):
178
  if self._initial_data:
@@ -181,21 +141,13 @@ class YouTube:
181
  return self._initial_data
182
 
183
  @property
184
- def player_config_args(self):
185
- if self._player_config_args:
186
- return self._player_config_args
187
-
188
- self._player_config_args = self.vid_info
189
- # On pre-signed videos, we need to use get_ytplayer_config to fix
190
- # the player_response item
191
- if 'streamingData' not in self.player_config_args['player_response']:
192
- config_response = extract.get_ytplayer_config(self.watch_html)
193
- if 'args' in config_response:
194
- self.player_config_args['player_response'] = config_response['args']['player_response'] # noqa: E501
195
- else:
196
- self.player_config_args['player_response'] = config_response
197
-
198
- return self._player_config_args
199
 
200
  @property
201
  def fmt_streams(self):
@@ -209,39 +161,29 @@ class YouTube:
209
  return self._fmt_streams
210
 
211
  self._fmt_streams = []
212
- # https://github.com/pytube/pytube/issues/165
213
- stream_maps = ["url_encoded_fmt_stream_map"]
214
- if "adaptive_fmts" in self.player_config_args:
215
- stream_maps.append("adaptive_fmts")
216
-
217
- # unscramble the progressive and adaptive stream manifests.
218
- for fmt in stream_maps:
219
- if not self.age_restricted and fmt in self.vid_info:
220
- extract.apply_descrambler(self.vid_info, fmt)
221
- extract.apply_descrambler(self.player_config_args, fmt)
222
-
223
- # If the cached js doesn't work, try fetching a new js file
224
- # https://github.com/pytube/pytube/issues/1054
225
- try:
226
- extract.apply_signature(self.player_config_args, fmt, self.js)
227
- except exceptions.ExtractError:
228
- # To force an update to the js file, we clear the cache and retry
229
- self._js = None
230
- self._js_url = None
231
- pytube.__js__ = None
232
- pytube.__js_url__ = None
233
- extract.apply_signature(self.player_config_args, fmt, self.js)
234
-
235
- # build instances of :class:`Stream <Stream>`
236
- # Initialize stream objects
237
- stream_manifest = self.player_config_args[fmt]
238
- for stream in stream_manifest:
239
- video = Stream(
240
- stream=stream,
241
- player_config_args=self.player_config_args,
242
- monostate=self.stream_monostate,
243
- )
244
- self._fmt_streams.append(video)
245
 
246
  self.stream_monostate.title = self.title
247
  self.stream_monostate.duration = self.length
@@ -266,9 +208,6 @@ class YouTube:
266
  elif reason == 'This live stream recording is not available.':
267
  raise exceptions.RecordingUnavailable(video_id=self.video_id)
268
  else:
269
- if reason == 'Video unavailable':
270
- if extract.is_region_blocked(self.watch_html):
271
- raise exceptions.VideoRegionBlocked(video_id=self.video_id)
272
  raise exceptions.VideoUnavailable(video_id=self.video_id)
273
  elif status == 'LOGIN_REQUIRED':
274
  if reason == (
@@ -288,7 +227,32 @@ class YouTube:
288
 
289
  :rtype: Dict[Any, Any]
290
  """
291
- return dict(parse_qsl(self.vid_info_raw))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  @property
294
  def caption_tracks(self) -> List[pytube.Caption]:
@@ -297,7 +261,7 @@ class YouTube:
297
  :rtype: List[Caption]
298
  """
299
  raw_tracks = (
300
- self.player_response.get("captions", {})
301
  .get("playerCaptionsTracklistRenderer", {})
302
  .get("captionTracks", [])
303
  )
@@ -327,7 +291,7 @@ class YouTube:
327
  :rtype: str
328
  """
329
  thumbnail_details = (
330
- self.player_response.get("videoDetails", {})
331
  .get("thumbnail", {})
332
  .get("thumbnails")
333
  )
@@ -363,7 +327,7 @@ class YouTube:
363
  return self._title
364
 
365
  try:
366
- self._title = self.player_response['videoDetails']['title']
367
  except KeyError:
368
  # Check_availability will raise the correct exception in most cases
369
  # if it doesn't, ask for a report.
@@ -388,7 +352,7 @@ class YouTube:
388
 
389
  :rtype: str
390
  """
391
- return self.player_response.get("videoDetails", {}).get("shortDescription")
392
 
393
  @property
394
  def rating(self) -> float:
@@ -397,7 +361,7 @@ class YouTube:
397
  :rtype: float
398
 
399
  """
400
- return self.player_response.get("videoDetails", {}).get("averageRating")
401
 
402
  @property
403
  def length(self) -> int:
@@ -405,14 +369,7 @@ class YouTube:
405
 
406
  :rtype: int
407
  """
408
- return int(
409
- self.player_config_args.get("length_seconds")
410
- or (
411
- self.player_response.get("videoDetails", {}).get(
412
- "lengthSeconds"
413
- )
414
- )
415
- )
416
 
417
  @property
418
  def views(self) -> int:
@@ -420,9 +377,7 @@ class YouTube:
420
 
421
  :rtype: int
422
  """
423
- return int(
424
- self.player_response.get("videoDetails", {}).get("viewCount")
425
- )
426
 
427
  @property
428
  def author(self) -> str:
@@ -431,7 +386,7 @@ class YouTube:
431
  """
432
  if self._author:
433
  return self._author
434
- self._author = self.player_response.get("videoDetails", {}).get(
435
  "author", "unknown"
436
  )
437
  return self._author
@@ -447,7 +402,7 @@ class YouTube:
447
 
448
  :rtype: List[str]
449
  """
450
- return self.player_response.get('videoDetails', {}).get('keywords', [])
451
 
452
  @property
453
  def channel_id(self) -> str:
@@ -455,7 +410,7 @@ class YouTube:
455
 
456
  :rtype: str
457
  """
458
- return self.player_response.get('videoDetails', {}).get('channelId', None)
459
 
460
  @property
461
  def channel_url(self) -> str:
 
6
  smaller peripheral modules and functions.
7
 
8
  """
 
9
  import logging
10
  from typing import Any, Callable, Dict, List, Optional
 
11
 
12
  import pytube
13
  import pytube.exceptions as exceptions
14
  from pytube import extract, request
15
  from pytube import Stream, StreamQuery
16
  from pytube.helpers import install_proxy
17
+ from pytube.innertube import InnerTube
18
  from pytube.metadata import YouTubeMetadata
19
  from pytube.monostate import Monostate
20
 
 
30
  on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
31
  on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
32
  proxies: Dict[str, str] = None,
33
+ use_oauth: bool = False,
34
+ allow_oauth_cache: bool = True
35
  ):
36
  """Construct a :class:`YouTube <YouTube>`.
37
 
 
48
  self._js: Optional[str] = None # js fetched by js_url
49
  self._js_url: Optional[str] = None # the url to the js, parsed from watch html
50
 
51
+ self._vid_info: Optional[Dict] = None # content fetched from innertube/player
 
 
 
 
 
 
52
 
53
  self._watch_html: Optional[str] = None # the html of /watch?v=<video_id>
54
  self._embed_html: Optional[str] = None
55
  self._player_config_args: Optional[Dict] = None # inline js in the html containing
 
 
56
  self._age_restricted: Optional[bool] = None
57
 
58
  self._fmt_streams: Optional[List[Stream]] = None
 
78
  self._title = None
79
  self._publish_date = None
80
 
81
+ self.use_oauth = use_oauth
82
+ self.allow_oauth_cache = allow_oauth_cache
83
+
84
  def __repr__(self):
85
  return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
86
 
 
98
  self._embed_html = request.get(url=self.embed_url)
99
  return self._embed_html
100
 
 
 
 
 
 
 
 
101
  @property
102
  def age_restricted(self):
103
  if self._age_restricted:
 
105
  self._age_restricted = extract.is_age_restricted(self.watch_html)
106
  return self._age_restricted
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  @property
109
  def js_url(self):
110
  if self._js_url:
 
133
 
134
  return self._js
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  @property
137
  def initial_data(self):
138
  if self._initial_data:
 
141
  return self._initial_data
142
 
143
  @property
144
+ def streaming_data(self):
145
+ """Return streamingData from video info."""
146
+ if 'streamingData' in self.vid_info:
147
+ return self.vid_info['streamingData']
148
+ else:
149
+ self.bypass_age_gate()
150
+ return self.vid_info['streamingData']
 
 
 
 
 
 
 
 
151
 
152
  @property
153
  def fmt_streams(self):
 
161
  return self._fmt_streams
162
 
163
  self._fmt_streams = []
164
+
165
+ stream_manifest = extract.apply_descrambler(self.streaming_data)
166
+
167
+ # If the cached js doesn't work, try fetching a new js file
168
+ # https://github.com/pytube/pytube/issues/1054
169
+ try:
170
+ extract.apply_signature(stream_manifest, self.vid_info, self.js)
171
+ except exceptions.ExtractError:
172
+ # To force an update to the js file, we clear the cache and retry
173
+ self._js = None
174
+ self._js_url = None
175
+ pytube.__js__ = None
176
+ pytube.__js_url__ = None
177
+ extract.apply_signature(stream_manifest, self.vid_info, self.js)
178
+
179
+ # build instances of :class:`Stream <Stream>`
180
+ # Initialize stream objects
181
+ for stream in stream_manifest:
182
+ video = Stream(
183
+ stream=stream,
184
+ monostate=self.stream_monostate,
185
+ )
186
+ self._fmt_streams.append(video)
 
 
 
 
 
 
 
 
 
 
187
 
188
  self.stream_monostate.title = self.title
189
  self.stream_monostate.duration = self.length
 
208
  elif reason == 'This live stream recording is not available.':
209
  raise exceptions.RecordingUnavailable(video_id=self.video_id)
210
  else:
 
 
 
211
  raise exceptions.VideoUnavailable(video_id=self.video_id)
212
  elif status == 'LOGIN_REQUIRED':
213
  if reason == (
 
227
 
228
  :rtype: Dict[Any, Any]
229
  """
230
+ if self._vid_info:
231
+ return self._vid_info
232
+
233
+ innertube = InnerTube(use_oauth=self.use_oauth, allow_cache=self.allow_oauth_cache)
234
+
235
+ innertube_response = innertube.player(self.video_id)
236
+ self._vid_info = innertube_response
237
+ return self._vid_info
238
+
239
+ def bypass_age_gate(self):
240
+ """Attempt to update the vid_info by bypassing the age gate."""
241
+ innertube = InnerTube(
242
+ client='ANDROID_EMBED',
243
+ use_oauth=self.use_oauth,
244
+ allow_cache=self.allow_oauth_cache
245
+ )
246
+ innertube_response = innertube.player(self.video_id)
247
+
248
+ playability_status = innertube_response['playabilityStatus'].get('status', None)
249
+
250
+ # If we still can't access the video, raise an exception
251
+ # (tier 3 age restriction)
252
+ if playability_status == 'UNPLAYABLE':
253
+ raise exceptions.AgeRestrictedError(self.video_id)
254
+
255
+ self._vid_info = innertube_response
256
 
257
  @property
258
  def caption_tracks(self) -> List[pytube.Caption]:
 
261
  :rtype: List[Caption]
262
  """
263
  raw_tracks = (
264
+ self.vid_info.get("captions", {})
265
  .get("playerCaptionsTracklistRenderer", {})
266
  .get("captionTracks", [])
267
  )
 
291
  :rtype: str
292
  """
293
  thumbnail_details = (
294
+ self.vid_info.get("videoDetails", {})
295
  .get("thumbnail", {})
296
  .get("thumbnails")
297
  )
 
327
  return self._title
328
 
329
  try:
330
+ self._title = self.vid_info['videoDetails']['title']
331
  except KeyError:
332
  # Check_availability will raise the correct exception in most cases
333
  # if it doesn't, ask for a report.
 
352
 
353
  :rtype: str
354
  """
355
+ return self.vid_info.get("videoDetails", {}).get("shortDescription")
356
 
357
  @property
358
  def rating(self) -> float:
 
361
  :rtype: float
362
 
363
  """
364
+ return self.vid_info.get("videoDetails", {}).get("averageRating")
365
 
366
  @property
367
  def length(self) -> int:
 
369
 
370
  :rtype: int
371
  """
372
+ return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
 
 
 
 
 
 
 
373
 
374
  @property
375
  def views(self) -> int:
 
377
 
378
  :rtype: int
379
  """
380
+ return int(self.vid_info.get("videoDetails", {}).get("viewCount"))
 
 
381
 
382
  @property
383
  def author(self) -> str:
 
386
  """
387
  if self._author:
388
  return self._author
389
+ self._author = self.vid_info.get("videoDetails", {}).get(
390
  "author", "unknown"
391
  )
392
  return self._author
 
402
 
403
  :rtype: List[str]
404
  """
405
+ return self.vid_info.get('videoDetails', {}).get('keywords', [])
406
 
407
  @property
408
  def channel_id(self) -> str:
 
410
 
411
  :rtype: str
412
  """
413
+ return self.vid_info.get('videoDetails', {}).get('channelId', None)
414
 
415
  @property
416
  def channel_url(self) -> str:
pytube/captions.py CHANGED
@@ -19,7 +19,17 @@ class Caption:
19
  Caption track data extracted from ``watch_html``.
20
  """
21
  self.url = caption_track.get("baseUrl")
22
- self.name = caption_track["name"]["simpleText"]
 
 
 
 
 
 
 
 
 
 
23
  # Use "vssId" instead of "languageCode", fix issue #779
24
  self.code = caption_track["vssId"]
25
  # Remove preceding '.' for backwards compatibility, e.g.:
 
19
  Caption track data extracted from ``watch_html``.
20
  """
21
  self.url = caption_track.get("baseUrl")
22
+
23
+ # Certain videos have runs instead of simpleText
24
+ # this handles that edge case
25
+ name_dict = caption_track['name']
26
+ if 'simpleText' in name_dict:
27
+ self.name = name_dict['simpleText']
28
+ else:
29
+ for el in name_dict['runs']:
30
+ if 'text' in el:
31
+ self.name = el['text']
32
+
33
  # Use "vssId" instead of "languageCode", fix issue #779
34
  self.code = caption_track["vssId"]
35
  # Remove preceding '.' for backwards compatibility, e.g.:
pytube/contrib/search.py CHANGED
@@ -145,6 +145,10 @@ class Search:
145
  if 'didYouMeanRenderer' in video_details:
146
  continue
147
 
 
 
 
 
148
  if 'videoRenderer' not in video_details:
149
  logger.warn('Unexpected renderer encountered.')
150
  logger.warn(f'Renderer name: {video_details.keys()}')
 
145
  if 'didYouMeanRenderer' in video_details:
146
  continue
147
 
148
+ # Seems to be the renderer used for the image shown on a no results page
149
+ if 'backgroundPromoRenderer' in video_details:
150
+ continue
151
+
152
  if 'videoRenderer' not in video_details:
153
  logger.warn('Unexpected renderer encountered.')
154
  logger.warn(f'Renderer name: {video_details.keys()}')
pytube/exceptions.py CHANGED
@@ -53,9 +53,23 @@ class VideoUnavailable(PytubeError):
53
  return f'{self.video_id} is unavailable'
54
 
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  class LiveStreamError(VideoUnavailable):
57
  """Video is a live stream."""
58
-
59
  def __init__(self, video_id: str):
60
  """
61
  :param str video_id:
 
53
  return f'{self.video_id} is unavailable'
54
 
55
 
56
+ class AgeRestrictedError(VideoUnavailable):
57
+ """Video is age restricted, and cannot be accessed without OAuth."""
58
+ def __init__(self, video_id: str):
59
+ """
60
+ :param str video_id:
61
+ A YouTube video identifier.
62
+ """
63
+ self.video_id = video_id
64
+ super().__init__(self.video_id)
65
+
66
+ @property
67
+ def error_string(self):
68
+ return f"{self.video_id} is age restricted, and can't be accessed without logging in."
69
+
70
+
71
  class LiveStreamError(VideoUnavailable):
72
  """Video is a live stream."""
 
73
  def __init__(self, video_id: str):
74
  """
75
  :param str video_id:
pytube/extract.py CHANGED
@@ -1,12 +1,11 @@
1
  """This module contains all non-cipher related data extraction logic."""
2
- import json
3
  import logging
4
  import urllib.parse
5
  import re
6
  from collections import OrderedDict
7
  from datetime import datetime
8
  from typing import Any, Dict, List, Optional, Tuple
9
- from urllib.parse import parse_qs, parse_qsl, quote, unquote, urlencode, urlparse
10
 
11
  from pytube.cipher import Cipher
12
  from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
@@ -90,34 +89,6 @@ def is_age_restricted(watch_html: str) -> bool:
90
  return True
91
 
92
 
93
- def is_region_blocked(watch_html: str) -> bool:
94
- """Determine if a video is not available in the user's region.
95
-
96
- :param str watch_html:
97
- The html contents of the watch page.
98
- :rtype: bool
99
- :returns:
100
- True if the video is blocked in the users region.
101
- False if not, or if unknown.
102
- """
103
- player_response = initial_player_response(watch_html)
104
- country_code_patterns = [
105
- r"gl\s*=\s*['\"](\w{2})['\"]", # gl="US"
106
- r"['\"]gl['\"]\s*:\s*['\"](\w{2})['\"]" # "gl":"US"
107
- ]
108
- for pattern in country_code_patterns:
109
- try:
110
- yt_detected_country = regex_search(pattern, watch_html, 1)
111
- available_countries = player_response[
112
- 'microformat']['playerMicroformatRenderer']['availableCountries']
113
- except (KeyError, RegexMatchError):
114
- pass
115
- else:
116
- if yt_detected_country not in available_countries:
117
- return True
118
- return False
119
-
120
-
121
  def playability_status(watch_html: str) -> (str, str):
122
  """Return the playability status and status explanation of a video.
123
 
@@ -197,10 +168,10 @@ def channel_name(url: str) -> str:
197
  YouTube channel name.
198
  """
199
  patterns = [
200
- r"(?:\/(c)\/([\d\w_\-]+)(\/.*)?)",
201
- r"(?:\/(channel)\/([\w\d_\-]+)(\/.*)?)",
202
- r"(?:\/(u)\/([\d\w_\-]+)(\/.*)?)",
203
- r"(?:\/(user)\/([\w\d_\-]+)(\/.*)?)"
204
  ]
205
  for pattern in patterns:
206
  regex = re.compile(pattern)
@@ -426,29 +397,23 @@ def get_ytcfg(html: str) -> str:
426
  )
427
 
428
 
429
- def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
430
  """Apply the decrypted signature to the stream manifest.
431
 
432
- :param dict config_args:
433
  Details of the media streams available.
434
- :param str fmt:
435
- Key in stream manifests (``ytplayer_config``) containing progressive
436
- download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
437
- ``adaptive_fmts``).
438
  :param str js:
439
  The contents of the base.js asset file.
440
 
441
  """
442
  cipher = Cipher(js=js)
443
- stream_manifest = config_args[fmt]
444
 
445
  for i, stream in enumerate(stream_manifest):
446
  try:
447
  url: str = stream["url"]
448
  except KeyError:
449
  live_stream = (
450
- json.loads(config_args["player_response"])
451
- .get("playabilityStatus", {},)
452
  .get("liveStreamability")
453
  )
454
  if live_stream:
@@ -468,27 +433,28 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
468
  logger.debug(
469
  "finished descrambling signature for itag=%s", stream["itag"]
470
  )
 
 
 
471
  query_params = parse_qs(urlparse(url).query)
 
 
 
 
472
  if 'ratebypass' not in query_params.keys():
473
  # Cipher n to get the updated value
474
 
475
- initial_n = list(query_params['n'][0])
476
  new_n = cipher.calculate_n(initial_n)
477
- query_params['n'][0] = new_n
478
 
479
- # Update the value
480
- parsed = urlparse(url)
481
- # The parsed query params are lists of a single element, convert to proper dicts.
482
- query_params = {
483
- k: v[0] for k,v in query_params.items()
484
- }
485
- url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}?{urlencode(query_params)}'
486
 
487
  # 403 forbidden fix
488
- stream_manifest[i]["url"] = url + "&sig=" + signature
489
 
490
 
491
- def apply_descrambler(stream_data: Dict, key: str) -> None:
492
  """Apply various in-place transforms to YouTube's media stream data.
493
 
494
  Creates a ``list`` of dictionaries by string splitting on commas, then
@@ -497,8 +463,6 @@ def apply_descrambler(stream_data: Dict, key: str) -> None:
497
 
498
  :param dict stream_data:
499
  Dictionary containing query string encoded values.
500
- :param str key:
501
- Name of the key in dictionary.
502
 
503
  **Example**:
504
 
@@ -508,64 +472,27 @@ def apply_descrambler(stream_data: Dict, key: str) -> None:
508
  {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
509
 
510
  """
511
- otf_type = "FORMAT_STREAM_TYPE_OTF"
512
-
513
- if key == "url_encoded_fmt_stream_map" and not stream_data.get(
514
- "url_encoded_fmt_stream_map"
515
- ):
516
- if isinstance(stream_data["player_response"], str):
517
- streaming_data = json.loads(stream_data["player_response"])["streamingData"]
518
- else:
519
- streaming_data = stream_data["player_response"]["streamingData"]
520
- formats = []
521
- if 'formats' in streaming_data.keys():
522
- formats.extend(streaming_data['formats'])
523
- if 'adaptiveFormats' in streaming_data.keys():
524
- formats.extend(streaming_data['adaptiveFormats'])
525
- try:
526
- stream_data[key] = [
527
- {
528
- "url": format_item["url"],
529
- "type": format_item["mimeType"],
530
- "quality": format_item["quality"],
531
- "itag": format_item["itag"],
532
- "fps": format_item["fps"] if 'video' in format_item["mimeType"] else None,
533
- "bitrate": format_item.get("bitrate"),
534
- "is_otf": (format_item.get("type") == otf_type),
535
- 'content_length': int(format_item.get('contentLength', 0)),
536
- }
537
- for format_item in formats
538
- ]
539
- except KeyError:
540
- cipher_url = [
541
- parse_qs(
542
- data[
543
- "cipher" if "cipher" in data.keys() else "signatureCipher"
544
- ]
545
- )
546
- for data in formats
547
- ]
548
- stream_data[key] = [
549
- {
550
- "url": cipher_url[i]["url"][0],
551
- "s": cipher_url[i]["s"][0],
552
- "type": format_item["mimeType"],
553
- "quality": format_item["quality"],
554
- "itag": format_item["itag"],
555
- "fps": format_item["fps"] if 'video' in format_item["mimeType"] else None,
556
- "bitrate": format_item.get("bitrate"),
557
- "is_otf": (format_item.get("type") == otf_type),
558
- 'content_length': int(format_item.get('contentLength', 0)),
559
- }
560
- for i, format_item in enumerate(formats)
561
- ]
562
- else:
563
- stream_data[key] = [
564
- {k: unquote(v) for k, v in parse_qsl(i)}
565
- for i in stream_data[key].split(",")
566
- ]
567
 
568
  logger.debug("applying descrambler")
 
569
 
570
 
571
  def initial_data(watch_html: str) -> str:
 
1
  """This module contains all non-cipher related data extraction logic."""
 
2
  import logging
3
  import urllib.parse
4
  import re
5
  from collections import OrderedDict
6
  from datetime import datetime
7
  from typing import Any, Dict, List, Optional, Tuple
8
+ from urllib.parse import parse_qs, quote, urlencode, urlparse
9
 
10
  from pytube.cipher import Cipher
11
  from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
 
89
  return True
90
 
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def playability_status(watch_html: str) -> (str, str):
93
  """Return the playability status and status explanation of a video.
94
 
 
168
  YouTube channel name.
169
  """
170
  patterns = [
171
+ r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
172
+ r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
173
+ r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
174
+ r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"
175
  ]
176
  for pattern in patterns:
177
  regex = re.compile(pattern)
 
397
  )
398
 
399
 
400
+ def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str) -> None:
401
  """Apply the decrypted signature to the stream manifest.
402
 
403
+ :param dict stream_manifest:
404
  Details of the media streams available.
 
 
 
 
405
  :param str js:
406
  The contents of the base.js asset file.
407
 
408
  """
409
  cipher = Cipher(js=js)
 
410
 
411
  for i, stream in enumerate(stream_manifest):
412
  try:
413
  url: str = stream["url"]
414
  except KeyError:
415
  live_stream = (
416
+ vid_info.get("playabilityStatus", {},)
 
417
  .get("liveStreamability")
418
  )
419
  if live_stream:
 
433
  logger.debug(
434
  "finished descrambling signature for itag=%s", stream["itag"]
435
  )
436
+ parsed_url = urlparse(url)
437
+
438
+ # Convert query params off url to dict
439
  query_params = parse_qs(urlparse(url).query)
440
+ query_params = {
441
+ k: v[0] for k,v in query_params.items()
442
+ }
443
+ query_params['sig'] = signature
444
  if 'ratebypass' not in query_params.keys():
445
  # Cipher n to get the updated value
446
 
447
+ initial_n = list(query_params['n'])
448
  new_n = cipher.calculate_n(initial_n)
449
+ query_params['n'] = new_n
450
 
451
+ url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}' # noqa:E501
 
 
 
 
 
 
452
 
453
  # 403 forbidden fix
454
+ stream_manifest[i]["url"] = url
455
 
456
 
457
+ def apply_descrambler(stream_data: Dict) -> None:
458
  """Apply various in-place transforms to YouTube's media stream data.
459
 
460
  Creates a ``list`` of dictionaries by string splitting on commas, then
 
463
 
464
  :param dict stream_data:
465
  Dictionary containing query string encoded values.
 
 
466
 
467
  **Example**:
468
 
 
472
  {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
473
 
474
  """
475
+ if 'url' in stream_data:
476
+ return None
477
+
478
+ # Merge formats and adaptiveFormats into a single list
479
+ formats = []
480
+ if 'formats' in stream_data.keys():
481
+ formats.extend(stream_data['formats'])
482
+ if 'adaptiveFormats' in stream_data.keys():
483
+ formats.extend(stream_data['adaptiveFormats'])
484
+
485
+ # Extract url and s from signatureCiphers as necessary
486
+ for data in formats:
487
+ if 'url' not in data:
488
+ if 'signatureCipher' in data:
489
+ cipher_url = parse_qs(data['signatureCipher'])
490
+ data['url'] = cipher_url['url'][0]
491
+ data['s'] = cipher_url['s'][0]
492
+ data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
 
494
  logger.debug("applying descrambler")
495
+ return formats
496
 
497
 
498
  def initial_data(watch_html: str) -> str:
pytube/helpers.py CHANGED
@@ -288,7 +288,6 @@ def generate_all_html_json_mocks():
288
  test_vid_ids = [
289
  '2lAe1cqCOXo',
290
  '5YceQ8YqYMc',
291
- 'hZpzr8TbF08',
292
  'irauhITDrsE',
293
  'm8uHb5jIGN8',
294
  'QRS8MkLhQmM',
@@ -326,7 +325,7 @@ def create_mock_html_json(vid_id) -> Dict[str, Any]:
326
  'js': yt.js,
327
  'embed_html': yt.embed_html,
328
  'watch_html': yt.watch_html,
329
- 'vid_info_raw': yt.vid_info_raw
330
  }
331
 
332
  logger.info(f'Outputing json.gz file to {gzip_filepath}')
 
288
  test_vid_ids = [
289
  '2lAe1cqCOXo',
290
  '5YceQ8YqYMc',
 
291
  'irauhITDrsE',
292
  'm8uHb5jIGN8',
293
  'QRS8MkLhQmM',
 
325
  'js': yt.js,
326
  'embed_html': yt.embed_html,
327
  'watch_html': yt.watch_html,
328
+ 'vid_info': yt.vid_info
329
  }
330
 
331
  logger.info(f'Outputing json.gz file to {gzip_filepath}')
pytube/innertube.py CHANGED
@@ -5,13 +5,28 @@ interfaces returns raw results. These should instead be parsed to extract
5
  the useful information for the end user.
6
  """
7
  # Native python imports
8
- from datetime import datetime
9
  import json
 
 
 
10
  from urllib import parse
11
 
12
  # Local imports
13
  from pytube import request
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  _default_clients = {
17
  'WEB': {
@@ -31,34 +46,158 @@ _default_clients = {
31
  }
32
  },
33
  'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  }
36
  _token_timeout = 1800
 
 
37
 
38
 
39
  class InnerTube:
40
  """Object for interacting with the innertube API."""
41
- def __init__(self, client='WEB', bearer_token=None):
 
 
 
 
 
 
 
 
 
 
42
  self.context = _default_clients[client]['context']
43
  self.api_key = _default_clients[client]['api_key']
44
- self.bearer_token = bearer_token
45
- self.last_refresh = None
46
- self.refresh_bearer_token()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def refresh_bearer_token(self, force=False):
49
- """Refreshes the OAuth token.
50
 
51
- This is skeleton code for potential future functionality, so it is incomplete.
 
52
  """
53
- # Skip refresh if it's been less than 30 minutes
54
- if self.last_refresh and not force:
55
- # Use a 30-minute timer.
56
- if (datetime.now() - self.last_refresh).total_seconds() < _token_timeout:
57
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- # TODO: Refresh the token
 
 
60
 
61
- self.last_refresh = datetime.now()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  @property
64
  def base_url(self):
@@ -76,19 +215,29 @@ class InnerTube:
76
  def base_params(self):
77
  """Return the base query parameters to transmit to the innertube API."""
78
  return {
79
- 'key': self.api_key
 
 
80
  }
81
 
82
  def _call_api(self, endpoint, query, data):
83
  """Make a request to a given endpoint with the provided query parameters and data."""
 
 
 
 
84
  endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
85
  headers = {
86
  'Content-Type': 'application/json',
87
  }
88
  # Add the bearer token if applicable
89
- if self.bearer_token:
90
- self.refresh_bearer_token()
91
- headers['authorization'] = f'Bearer {self.bearer_token}'
 
 
 
 
92
 
93
  response = request._execute_request(
94
  endpoint_url,
 
5
  the useful information for the end user.
6
  """
7
  # Native python imports
 
8
  import json
9
+ import os
10
+ import pathlib
11
+ import time
12
  from urllib import parse
13
 
14
  # Local imports
15
  from pytube import request
16
 
17
+ # YouTube on TV client secrets
18
+ _client_id = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
19
+ _client_secret = 'SboVhoG9s0rNafixCSGGKXAT'
20
+
21
+ # Extracted API keys -- unclear what these are linked to.
22
+ _api_keys = [
23
+ 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
24
+ 'AIzaSyCtkvNIR1HCEwzsqK6JuE6KqpyjusIRI30',
25
+ 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
26
+ 'AIzaSyC8UYZpvA2eknNex0Pjid0_eTLJoDu6los',
27
+ 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
28
+ 'AIzaSyDHQ9ipnphqTzDqZsbtd8_Ru4_kiKVQe2k'
29
+ ]
30
 
31
  _default_clients = {
32
  'WEB': {
 
46
  }
47
  },
48
  'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
49
+ },
50
+ 'WEB_EMBED': {
51
+ 'context': {
52
+ 'client': {
53
+ 'clientName': 'WEB',
54
+ 'clientVersion': '2.20210721.00.00',
55
+ 'clientScreen': 'EMBED'
56
+ }
57
+ },
58
+ 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
59
+ },
60
+ 'ANDROID_EMBED': {
61
+ 'context': {
62
+ 'client': {
63
+ 'clientName': 'ANDROID',
64
+ 'clientVersion': '16.20',
65
+ 'clientScreen': 'EMBED'
66
+ }
67
+ },
68
+ 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
69
  }
70
  }
71
  _token_timeout = 1800
72
+ _cache_dir = pathlib.Path(__file__).parent.resolve() / '__cache__'
73
+ _token_file = os.path.join(_cache_dir, 'tokens.json')
74
 
75
 
76
  class InnerTube:
77
  """Object for interacting with the innertube API."""
78
+ def __init__(self, client='ANDROID', use_oauth=False, allow_cache=True):
79
+ """Initialize an InnerTube object.
80
+
81
+ :param str client:
82
+ Client to use for the object.
83
+ Default to web because it returns the most playback types.
84
+ :param bool use_oauth:
85
+ Whether or not to authenticate to YouTube.
86
+ :param bool allow_cache:
87
+ Allows caching of oauth tokens on the machine.
88
+ """
89
  self.context = _default_clients[client]['context']
90
  self.api_key = _default_clients[client]['api_key']
91
+ self.access_token = None
92
+ self.refresh_token = None
93
+ self.use_oauth = use_oauth
94
+ self.allow_cache = allow_cache
95
+
96
+ # Stored as epoch time
97
+ self.expires = None
98
+
99
+ # Try to load from file if specified
100
+ if self.use_oauth and self.allow_cache:
101
+ # Try to load from file if possible
102
+ if os.path.exists(_token_file):
103
+ with open(_token_file) as f:
104
+ data = json.load(f)
105
+ self.access_token = data['access_token']
106
+ self.refresh_token = data['refresh_token']
107
+ self.expires = data['expires']
108
+ self.refresh_bearer_token()
109
+
110
+ def cache_tokens(self):
111
+ """Cache tokens to file if allowed."""
112
+ if not self.allow_cache:
113
+ return
114
+
115
+ data = {
116
+ 'access_token': self.access_token,
117
+ 'refresh_token': self.refresh_token,
118
+ 'expires': self.expires
119
+ }
120
+ if not os.path.exists(_cache_dir):
121
+ os.mkdir(_cache_dir)
122
+ with open(_token_file, 'w') as f:
123
+ json.dump(data, f)
124
 
125
  def refresh_bearer_token(self, force=False):
126
+ """Refreshes the OAuth token if necessary.
127
 
128
+ :param bool force:
129
+ Force-refresh the bearer token.
130
  """
131
+ if not self.use_oauth:
132
+ return
133
+ # Skip refresh if it's not necessary and not forced
134
+ if self.expires > time.time() and not force:
135
+ return
136
+
137
+ # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
138
+ start_time = int(time.time() - 30)
139
+ data = {
140
+ 'client_id': _client_id,
141
+ 'client_secret': _client_secret,
142
+ 'grant_type': 'refresh_token',
143
+ 'refresh_token': self.refresh_token
144
+ }
145
+ response = request._execute_request(
146
+ 'https://oauth2.googleapis.com/token',
147
+ 'POST',
148
+ headers={
149
+ 'Content-Type': 'application/json'
150
+ },
151
+ data=data
152
+ )
153
+ response_data = json.loads(response.read())
154
 
155
+ self.access_token = response_data['access_token']
156
+ self.expires = start_time + response_data['expires_in']
157
+ self.cache_tokens()
158
 
159
+ def fetch_bearer_token(self):
160
+ """Fetch an OAuth token."""
161
+ # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
162
+ start_time = int(time.time() - 30)
163
+ data = {
164
+ 'client_id': _client_id,
165
+ 'scope': 'https://www.googleapis.com/auth/youtube'
166
+ }
167
+ response = request._execute_request(
168
+ 'https://oauth2.googleapis.com/device/code',
169
+ 'POST',
170
+ headers={
171
+ 'Content-Type': 'application/json'
172
+ },
173
+ data=data
174
+ )
175
+ response_data = json.loads(response.read())
176
+ verification_url = response_data['verification_url']
177
+ user_code = response_data['user_code']
178
+ print(f'Please open {verification_url} and input code {user_code}')
179
+ input('Press enter when you have completed this step.')
180
+
181
+ data = {
182
+ 'client_id': _client_id,
183
+ 'client_secret': _client_secret,
184
+ 'device_code': response_data['device_code'],
185
+ 'grant_type': 'urn:ietf:params:oauth:grant-type:device_code'
186
+ }
187
+ response = request._execute_request(
188
+ 'https://oauth2.googleapis.com/token',
189
+ 'POST',
190
+ headers={
191
+ 'Content-Type': 'application/json'
192
+ },
193
+ data=data
194
+ )
195
+ response_data = json.loads(response.read())
196
+
197
+ self.access_token = response_data['access_token']
198
+ self.refresh_token = response_data['refresh_token']
199
+ self.expires = start_time + response_data['expires_in']
200
+ self.cache_tokens()
201
 
202
  @property
203
  def base_url(self):
 
215
  def base_params(self):
216
  """Return the base query parameters to transmit to the innertube API."""
217
  return {
218
+ 'key': self.api_key,
219
+ 'contentCheckOk': True,
220
+ 'racyCheckOk': True
221
  }
222
 
223
  def _call_api(self, endpoint, query, data):
224
  """Make a request to a given endpoint with the provided query parameters and data."""
225
+ # Remove the API key if oauth is being used.
226
+ if self.use_oauth:
227
+ del query['key']
228
+
229
  endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
230
  headers = {
231
  'Content-Type': 'application/json',
232
  }
233
  # Add the bearer token if applicable
234
+ if self.use_oauth:
235
+ if self.access_token:
236
+ self.refresh_bearer_token()
237
+ headers['Authorization'] = f'Bearer {self.access_token}'
238
+ else:
239
+ self.fetch_bearer_token()
240
+ headers['Authorization'] = f'Bearer {self.access_token}'
241
 
242
  response = request._execute_request(
243
  endpoint_url,
pytube/streams.py CHANGED
@@ -25,15 +25,12 @@ class Stream:
25
  """Container for stream manifest data."""
26
 
27
  def __init__(
28
- self, stream: Dict, player_config_args: Dict, monostate: Monostate
29
  ):
30
  """Construct a :class:`Stream <Stream>`.
31
 
32
  :param dict stream:
33
  The unscrambled data extracted from YouTube.
34
- :param dict player_config_args:
35
- The data object containing video media data like title and
36
- keywords.
37
  :param dict monostate:
38
  Dictionary of data shared across all instances of
39
  :class:`Stream <Stream>`.
@@ -50,7 +47,7 @@ class Stream:
50
  # set type and codec info
51
 
52
  # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
53
- self.mime_type, self.codecs = extract.mime_type_codec(stream["type"])
54
 
55
  # 'video/webm' -> 'video', 'webm'
56
  self.type, self.subtype = self.mime_type.split("/")
@@ -62,16 +59,16 @@ class Stream:
62
  self.is_otf: bool = stream["is_otf"]
63
  self.bitrate: Optional[int] = stream["bitrate"]
64
 
65
- self._filesize: Optional[int] = stream['content_length'] # filesize in bytes
 
66
 
67
  # Additional information about the stream format, such as resolution,
68
  # frame rate, and whether the stream is live (HLS) or 3D.
69
  itag_profile = get_format_profile(self.itag)
70
  self.is_dash = itag_profile["is_dash"]
71
  self.abr = itag_profile["abr"] # average bitrate (audio streams only)
72
- self.fps = stream[
73
- "fps"
74
- ] # frames per second (video streams only)
75
  self.resolution = itag_profile[
76
  "resolution"
77
  ] # resolution (e.g.: "480p")
@@ -79,9 +76,6 @@ class Stream:
79
  self.is_hdr = itag_profile["is_hdr"]
80
  self.is_live = itag_profile["is_live"]
81
 
82
- # The player configuration, contains info like the video title.
83
- self.player_config_args = player_config_args
84
-
85
  @property
86
  def is_adaptive(self) -> bool:
87
  """Whether the stream is DASH.
 
25
  """Container for stream manifest data."""
26
 
27
  def __init__(
28
+ self, stream: Dict, monostate: Monostate
29
  ):
30
  """Construct a :class:`Stream <Stream>`.
31
 
32
  :param dict stream:
33
  The unscrambled data extracted from YouTube.
 
 
 
34
  :param dict monostate:
35
  Dictionary of data shared across all instances of
36
  :class:`Stream <Stream>`.
 
47
  # set type and codec info
48
 
49
  # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
50
+ self.mime_type, self.codecs = extract.mime_type_codec(stream["mimeType"])
51
 
52
  # 'video/webm' -> 'video', 'webm'
53
  self.type, self.subtype = self.mime_type.split("/")
 
59
  self.is_otf: bool = stream["is_otf"]
60
  self.bitrate: Optional[int] = stream["bitrate"]
61
 
62
+ # filesize in bytes
63
+ self._filesize: Optional[int] = int(stream.get('contentLength', 0))
64
 
65
  # Additional information about the stream format, such as resolution,
66
  # frame rate, and whether the stream is live (HLS) or 3D.
67
  itag_profile = get_format_profile(self.itag)
68
  self.is_dash = itag_profile["is_dash"]
69
  self.abr = itag_profile["abr"] # average bitrate (audio streams only)
70
+ if 'fps' in stream:
71
+ self.fps = stream['fps'] # Video streams only
 
72
  self.resolution = itag_profile[
73
  "resolution"
74
  ] # resolution (e.g.: "480p")
 
76
  self.is_hdr = itag_profile["is_hdr"]
77
  self.is_live = itag_profile["is_live"]
78
 
 
 
 
79
  @property
80
  def is_adaptive(self) -> bool:
81
  """Whether the stream is DASH.
tests/conftest.py CHANGED
@@ -27,7 +27,6 @@ def load_and_init_from_playback_file(filename, mock_urlopen):
27
  mock_url_open_object = mock.Mock()
28
  mock_url_open_object.read.side_effect = [
29
  pb['watch_html'].encode('utf-8'),
30
- pb['vid_info_raw'].encode('utf-8'),
31
  pb['js'].encode('utf-8')
32
  ]
33
  mock_urlopen.return_value = mock_url_open_object
@@ -39,10 +38,9 @@ def load_and_init_from_playback_file(filename, mock_urlopen):
39
  # deferred
40
  v = YouTube(pb["url"])
41
  v.watch_html
42
- v.vid_info_raw
43
  v.js
44
  v.fmt_streams
45
- v.player_response
46
  return v
47
 
48
 
@@ -81,13 +79,6 @@ def missing_recording():
81
  return load_playback_file(filename)
82
 
83
 
84
- @pytest.fixture
85
- def region_blocked():
86
- """Youtube instance initialized with video id hZpzr8TbF08."""
87
- filename = "yt-video-hZpzr8TbF08-html.json.gz"
88
- return load_playback_file(filename)
89
-
90
-
91
  @pytest.fixture
92
  def playlist_html():
93
  """Youtube playlist HTML loaded on 2020-01-25 from
 
27
  mock_url_open_object = mock.Mock()
28
  mock_url_open_object.read.side_effect = [
29
  pb['watch_html'].encode('utf-8'),
 
30
  pb['js'].encode('utf-8')
31
  ]
32
  mock_urlopen.return_value = mock_url_open_object
 
38
  # deferred
39
  v = YouTube(pb["url"])
40
  v.watch_html
41
+ v._vid_info = pb['vid_info']
42
  v.js
43
  v.fmt_streams
 
44
  return v
45
 
46
 
 
79
  return load_playback_file(filename)
80
 
81
 
 
 
 
 
 
 
 
82
  @pytest.fixture
83
  def playlist_html():
84
  """Youtube playlist HTML loaded on 2020-01-25 from
tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz and b/tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz differ
 
tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz and b/tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz differ
 
tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz and b/tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz differ
 
tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz and b/tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz differ
 
tests/mocks/yt-video-hZpzr8TbF08-html.json.gz DELETED
Binary file (642 kB)
 
tests/mocks/yt-video-irauhITDrsE-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-irauhITDrsE-html.json.gz and b/tests/mocks/yt-video-irauhITDrsE-html.json.gz differ
 
tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz and b/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz differ
 
tests/test_exceptions.py CHANGED
@@ -86,15 +86,3 @@ def test_raises_recording_unavailable(missing_recording):
86
  mock_url_open.return_value = mock_url_open_object
87
  with pytest.raises(exceptions.RecordingUnavailable):
88
  YouTube('https://youtube.com/watch?v=5YceQ8YqYMc').streams
89
-
90
-
91
- def test_raises_video_region_blocked(region_blocked):
92
- with mock.patch('pytube.request.urlopen') as mock_url_open:
93
- # Mock the responses to YouTube
94
- mock_url_open_object = mock.Mock()
95
- mock_url_open_object.read.side_effect = [
96
- region_blocked['watch_html'].encode('utf-8')
97
- ]
98
- mock_url_open.return_value = mock_url_open_object
99
- with pytest.raises(exceptions.VideoRegionBlocked):
100
- YouTube('https://youtube.com/watch?v=hZpzr8TbF08').streams
 
86
  mock_url_open.return_value = mock_url_open_object
87
  with pytest.raises(exceptions.RecordingUnavailable):
88
  YouTube('https://youtube.com/watch?v=5YceQ8YqYMc').streams
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_helpers.py CHANGED
@@ -120,7 +120,7 @@ def test_create_mock_html_json(mock_url_open, mock_open):
120
  b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
121
  b'embed_html',
122
  b'watch_html',
123
- b'vid_info_raw',
124
  ]
125
  mock_url_open.return_value = mock_url_open_object
126
 
 
120
  b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
121
  b'embed_html',
122
  b'watch_html',
123
+ b'{\"responseContext\":{}}',
124
  ]
125
  mock_url_open.return_value = mock_url_open_object
126
 
tests/test_query.py CHANGED
@@ -5,18 +5,18 @@ import pytest
5
  @pytest.mark.parametrize(
6
  ("test_input", "expected"),
7
  [
8
- ({"progressive": True}, [18, 22]),
9
  ({"resolution": "720p"}, [22, 136, 247, 398]),
10
  ({"res": "720p"}, [22, 136, 247, 398]),
11
  ({"fps": 24, "resolution": "480p"}, [135, 244, 397]),
12
- ({"mime_type": "audio/mp4"}, [140]),
13
- ({"type": "audio"}, [140, 249, 250, 251]),
14
- ({"subtype": "3gpp"}, []),
15
  ({"abr": "128kbps"}, [140]),
16
  ({"bitrate": "128kbps"}, [140]),
17
  ({"audio_codec": "opus"}, [249, 250, 251]),
18
  ({"video_codec": "vp9"}, [248, 247, 244, 243, 242, 278]),
19
- ({"only_audio": True}, [140, 249, 250, 251]),
20
  ({"only_video": True, "video_codec": "avc1.4d4015"}, [133]),
21
  ({"adaptive": True, "resolution": "1080p"}, [137, 248, 399]),
22
  ({"custom_filter_functions": [lambda s: s.itag == 18]}, [18]),
@@ -50,7 +50,7 @@ def test_get_first(cipher_signature):
50
  """Ensure :meth:`~pytube.StreamQuery.first` returns the expected
51
  :class:`Stream <Stream>`.
52
  """
53
- assert cipher_signature.streams[0].itag == 18
54
 
55
 
56
  def test_order_by(cipher_signature):
@@ -61,7 +61,13 @@ def test_order_by(cipher_signature):
61
  s.itag
62
  for s in cipher_signature.streams.filter(type="audio").order_by("itag")
63
  ]
64
- assert itags == [140, 249, 250, 251]
 
 
 
 
 
 
65
 
66
 
67
  def test_order_by_descending(cipher_signature):
@@ -75,7 +81,12 @@ def test_order_by_descending(cipher_signature):
75
  .order_by("itag")
76
  .desc()
77
  ]
78
- assert itags == [251, 250, 249, 140]
 
 
 
 
 
79
 
80
 
81
  def test_order_by_non_numerical(cipher_signature):
@@ -99,7 +110,11 @@ def test_order_by_ascending(cipher_signature):
99
  .order_by("itag")
100
  .asc()
101
  ]
102
- assert itags == [140, 249, 250, 251]
 
 
 
 
103
 
104
 
105
  def test_order_by_non_numerical_ascending(cipher_signature):
@@ -114,7 +129,16 @@ def test_order_by_non_numerical_ascending(cipher_signature):
114
 
115
  def test_order_by_with_none_values(cipher_signature):
116
  abrs = [s.abr for s in cipher_signature.streams.order_by("abr").asc()]
117
- assert abrs == ["50kbps", "70kbps", "96kbps", "128kbps", "160kbps", "192kbps"]
 
 
 
 
 
 
 
 
 
118
 
119
 
120
  def test_get_by_itag(cipher_signature):
@@ -143,7 +167,7 @@ def test_get_highest_resolution(cipher_signature):
143
  def test_filter_is_dash(cipher_signature):
144
  streams = cipher_signature.streams.filter(is_dash=False)
145
  itags = [s.itag for s in streams]
146
- assert itags == [18, 22]
147
 
148
 
149
  def test_get_audio_only(cipher_signature):
@@ -155,13 +179,13 @@ def test_get_audio_only_with_subtype(cipher_signature):
155
 
156
 
157
  def test_sequence(cipher_signature):
158
- assert len(cipher_signature.streams) == 24
159
  assert cipher_signature.streams[0] is not None
160
 
161
 
162
  def test_otf(cipher_signature):
163
  non_otf = cipher_signature.streams.otf()
164
- assert len(non_otf) == 24
165
 
166
  otf = cipher_signature.streams.otf(True)
167
  assert len(otf) == 0
 
5
  @pytest.mark.parametrize(
6
  ("test_input", "expected"),
7
  [
8
+ ({"progressive": True}, [17, 18, 22]),
9
  ({"resolution": "720p"}, [22, 136, 247, 398]),
10
  ({"res": "720p"}, [22, 136, 247, 398]),
11
  ({"fps": 24, "resolution": "480p"}, [135, 244, 397]),
12
+ ({"mime_type": "audio/mp4"}, [139, 140]),
13
+ ({"type": "audio"}, [139, 140, 249, 250, 251]),
14
+ ({"subtype": "3gpp"}, [17]),
15
  ({"abr": "128kbps"}, [140]),
16
  ({"bitrate": "128kbps"}, [140]),
17
  ({"audio_codec": "opus"}, [249, 250, 251]),
18
  ({"video_codec": "vp9"}, [248, 247, 244, 243, 242, 278]),
19
+ ({"only_audio": True}, [139, 140, 249, 250, 251]),
20
  ({"only_video": True, "video_codec": "avc1.4d4015"}, [133]),
21
  ({"adaptive": True, "resolution": "1080p"}, [137, 248, 399]),
22
  ({"custom_filter_functions": [lambda s: s.itag == 18]}, [18]),
 
50
  """Ensure :meth:`~pytube.StreamQuery.first` returns the expected
51
  :class:`Stream <Stream>`.
52
  """
53
+ assert cipher_signature.streams.first().itag == cipher_signature.streams[0].itag
54
 
55
 
56
  def test_order_by(cipher_signature):
 
61
  s.itag
62
  for s in cipher_signature.streams.filter(type="audio").order_by("itag")
63
  ]
64
+ expected_itags = [
65
+ s.itag
66
+ for s in cipher_signature.streams.filter(type="audio")
67
+ ]
68
+ expected_itags.sort()
69
+
70
+ assert itags == expected_itags
71
 
72
 
73
  def test_order_by_descending(cipher_signature):
 
81
  .order_by("itag")
82
  .desc()
83
  ]
84
+ expected_itags = [
85
+ s.itag
86
+ for s in cipher_signature.streams.filter(type="audio")
87
+ ]
88
+ expected_itags.sort(reverse=True)
89
+ assert itags == expected_itags
90
 
91
 
92
  def test_order_by_non_numerical(cipher_signature):
 
110
  .order_by("itag")
111
  .asc()
112
  ]
113
+ expected_itags = [
114
+ s.itag
115
+ for s in cipher_signature.streams.filter(type="audio")
116
+ ]
117
+ assert itags == expected_itags
118
 
119
 
120
  def test_order_by_non_numerical_ascending(cipher_signature):
 
129
 
130
  def test_order_by_with_none_values(cipher_signature):
131
  abrs = [s.abr for s in cipher_signature.streams.order_by("abr").asc()]
132
+ assert abrs == [
133
+ "24kbps",
134
+ "48kbps",
135
+ "50kbps",
136
+ "70kbps",
137
+ "96kbps",
138
+ "128kbps",
139
+ "160kbps",
140
+ "192kbps"
141
+ ]
142
 
143
 
144
  def test_get_by_itag(cipher_signature):
 
167
  def test_filter_is_dash(cipher_signature):
168
  streams = cipher_signature.streams.filter(is_dash=False)
169
  itags = [s.itag for s in streams]
170
+ assert itags == [17, 18, 22]
171
 
172
 
173
  def test_get_audio_only(cipher_signature):
 
179
 
180
 
181
  def test_sequence(cipher_signature):
182
+ assert len(cipher_signature.streams) == 26
183
  assert cipher_signature.streams[0] is not None
184
 
185
 
186
  def test_otf(cipher_signature):
187
  non_otf = cipher_signature.streams.otf()
188
+ assert len(non_otf) == 26
189
 
190
  otf = cipher_signature.streams.otf(True)
191
  assert len(otf) == 0
tests/test_streams.py CHANGED
@@ -28,19 +28,19 @@ def test_stream_to_buffer(mock_request, cipher_signature):
28
 
29
 
30
  def test_filesize(cipher_signature):
31
- assert cipher_signature.streams[0].filesize == 28282013
32
 
33
 
34
  def test_filesize_approx(cipher_signature):
35
  stream = cipher_signature.streams[0]
36
 
37
- assert stream.filesize_approx == 28309811
38
  stream.bitrate = None
39
- assert stream.filesize_approx == 28282013
40
 
41
 
42
  def test_default_filename(cipher_signature):
43
- expected = "YouTube Rewind 2019 For the Record YouTubeRewind.mp4"
44
  stream = cipher_signature.streams[0]
45
  assert stream.default_filename == expected
46
 
@@ -137,7 +137,7 @@ def test_download_with_prefix(cipher_signature):
137
  file_path = stream.download(filename_prefix="prefix")
138
  assert file_path == os.path.join(
139
  "/target",
140
- "prefixYouTube Rewind 2019 For the Record YouTubeRewind.mp4"
141
  )
142
 
143
 
@@ -175,7 +175,7 @@ def test_download_with_existing(cipher_signature):
175
  file_path = stream.download()
176
  assert file_path == os.path.join(
177
  "/target",
178
- "YouTube Rewind 2019 For the Record YouTubeRewind.mp4"
179
  )
180
  assert not request.stream.called
181
 
@@ -196,7 +196,7 @@ def test_download_with_existing_no_skip(cipher_signature):
196
  file_path = stream.download(skip_existing=False)
197
  assert file_path == os.path.join(
198
  "/target",
199
- "YouTube Rewind 2019 For the Record YouTubeRewind.mp4"
200
  )
201
  assert request.stream.called
202
 
@@ -250,32 +250,19 @@ def test_on_complete_hook(cipher_signature):
250
 
251
 
252
  def test_author(cipher_signature):
253
- expected = "Test author"
254
- cipher_signature._player_response = {"videoDetails": {"author": expected}}
255
- assert cipher_signature.author == expected
256
-
257
- expected = "unknown"
258
- cipher_signature.author = None
259
- cipher_signature._player_response = {'key': 'value'}
260
- assert cipher_signature.author == expected
261
 
262
 
263
  def test_thumbnail_when_in_details(cipher_signature):
264
- expected = "some url"
265
  cipher_signature._player_response = {
266
  "videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
267
  }
268
  assert cipher_signature.thumbnail_url == expected
269
 
270
 
271
- def test_thumbnail_when_not_in_details(cipher_signature):
272
- expected = "https://img.youtube.com/vi/2lAe1cqCOXo/maxresdefault.jpg"
273
- cipher_signature._player_response = {'key': 'value'}
274
- assert cipher_signature.thumbnail_url == expected
275
-
276
-
277
  def test_repr_for_audio_streams(cipher_signature):
278
- stream = str(cipher_signature.streams.filter(only_audio=True)[0])
279
  expected = (
280
  '<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
281
  'acodec="mp4a.40.2" progressive="False" type="audio">'
@@ -293,13 +280,16 @@ def test_repr_for_video_streams(cipher_signature):
293
 
294
 
295
  def test_repr_for_progressive_streams(cipher_signature):
296
- stream = str(cipher_signature.streams.filter(progressive=True)[0])
 
 
 
297
  expected = (
298
  '<Stream: itag="18" mime_type="video/mp4" res="360p" fps="24fps" '
299
  'vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" '
300
  'type="video">'
301
  )
302
- assert stream == expected
303
 
304
 
305
  def test_repr_for_adaptive_streams(cipher_signature):
 
28
 
29
 
30
  def test_filesize(cipher_signature):
31
+ assert cipher_signature.streams[0].filesize == 3399554
32
 
33
 
34
  def test_filesize_approx(cipher_signature):
35
  stream = cipher_signature.streams[0]
36
 
37
+ assert stream.filesize_approx == 3403320
38
  stream.bitrate = None
39
+ assert stream.filesize_approx == 3399554
40
 
41
 
42
  def test_default_filename(cipher_signature):
43
+ expected = "YouTube Rewind 2019 For the Record YouTubeRewind.3gpp"
44
  stream = cipher_signature.streams[0]
45
  assert stream.default_filename == expected
46
 
 
137
  file_path = stream.download(filename_prefix="prefix")
138
  assert file_path == os.path.join(
139
  "/target",
140
+ "prefixYouTube Rewind 2019 For the Record YouTubeRewind.3gpp"
141
  )
142
 
143
 
 
175
  file_path = stream.download()
176
  assert file_path == os.path.join(
177
  "/target",
178
+ "YouTube Rewind 2019 For the Record YouTubeRewind.3gpp"
179
  )
180
  assert not request.stream.called
181
 
 
196
  file_path = stream.download(skip_existing=False)
197
  assert file_path == os.path.join(
198
  "/target",
199
+ "YouTube Rewind 2019 For the Record YouTubeRewind.3gpp"
200
  )
201
  assert request.stream.called
202
 
 
250
 
251
 
252
  def test_author(cipher_signature):
253
+ assert cipher_signature.author == 'YouTube'
 
 
 
 
 
 
 
254
 
255
 
256
  def test_thumbnail_when_in_details(cipher_signature):
257
+ expected = f"https://i.ytimg.com/vi/{cipher_signature.video_id}/sddefault.jpg"
258
  cipher_signature._player_response = {
259
  "videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
260
  }
261
  assert cipher_signature.thumbnail_url == expected
262
 
263
 
 
 
 
 
 
 
264
  def test_repr_for_audio_streams(cipher_signature):
265
+ stream = str(cipher_signature.streams.filter(only_audio=True)[1])
266
  expected = (
267
  '<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
268
  'acodec="mp4a.40.2" progressive="False" type="audio">'
 
280
 
281
 
282
  def test_repr_for_progressive_streams(cipher_signature):
283
+ stream_reprs = [
284
+ str(s)
285
+ for s in cipher_signature.streams.filter(progressive=True)
286
+ ]
287
  expected = (
288
  '<Stream: itag="18" mime_type="video/mp4" res="360p" fps="24fps" '
289
  'vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" '
290
  'type="video">'
291
  )
292
+ assert expected in stream_reprs
293
 
294
 
295
  def test_repr_for_adaptive_streams(cipher_signature):