Ferdowsi
/

pytube

Model card Files Files and versions Community

Alex G commited on Aug 3, 2021

Commit

fc9aec5

unverified ·

1 Parent(s): 10989d2

Fix #1060 (#1067)

* User InnerTube in place of `get_video_info` url

* Added some additional base parameters for innertube requests.

* Added Oauth support for innertube client

* Add exception for age-restricted videos which can no longer be accessed without using auth.

* Carved out and simplified code where possible due to API changes.

* Added renderer catch -- fixes #1068

* Additional channel name support for URL-encoded names.

* Updated test mocks, removed region-locked test because that functionality no longer works.

Files changed (21) hide show

.gitignore +3 -0
pytube/__main__.py +73 -118
pytube/captions.py +11 -1
pytube/contrib/search.py +4 -0
pytube/exceptions.py +15 -1
pytube/extract.py +39 -112
pytube/helpers.py +1 -2
pytube/innertube.py +167 -18
pytube/streams.py +6 -12
tests/conftest.py +1 -10
tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz +0 -0
tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz +0 -0
tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz +0 -0
tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz +0 -0
tests/mocks/yt-video-hZpzr8TbF08-html.json.gz +0 -0
tests/mocks/yt-video-irauhITDrsE-html.json.gz +0 -0
tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz +0 -0
tests/test_exceptions.py +0 -12
tests/test_helpers.py +1 -1
tests/test_query.py +37 -13
tests/test_streams.py +15 -25

.gitignore CHANGED Viewed

@@ -138,3 +138,6 @@ test/**/*.xml
 # Common virtual environments
 venv/
 env/

 # Common virtual environments
 venv/
 env/
+# Token cache location
+__cache__/

pytube/__main__.py CHANGED Viewed

@@ -6,16 +6,15 @@ exclusively on the developer interface. Pytube offloads the heavy lifting to
 smaller peripheral modules and functions.
 """
-import json
 import logging
 from typing import Any, Callable, Dict, List, Optional
-from urllib.parse import parse_qsl
 import pytube
 import pytube.exceptions as exceptions
 from pytube import extract, request
 from pytube import Stream, StreamQuery
 from pytube.helpers import install_proxy
 from pytube.metadata import YouTubeMetadata
 from pytube.monostate import Monostate
@@ -31,6 +30,8 @@ class YouTube:
         on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
         on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
         proxies: Dict[str, str] = None,
     ):
         """Construct a :class:`YouTube <YouTube>`.
@@ -47,19 +48,11 @@ class YouTube:
         self._js: Optional[str] = None  # js fetched by js_url
         self._js_url: Optional[str] = None  # the url to the js, parsed from watch html
-        # note: vid_info may eventually be removed. It sounds like it once had
-        # additional formats, but that doesn't appear to still be the case.
-        # the url to vid info, parsed from watch html
-        self._vid_info_url: Optional[str] = None
-        self._vid_info_raw: Optional[str] = None  # content fetched by vid_info_url
-        self._vid_info: Optional[Dict] = None  # parsed content of vid_info_raw
         self._watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
         self._embed_html: Optional[str] = None
         self._player_config_args: Optional[Dict] = None  # inline js in the html containing
-        self._player_response: Optional[Dict] = None
-        # streams
         self._age_restricted: Optional[bool] = None
         self._fmt_streams: Optional[List[Stream]] = None
@@ -85,6 +78,9 @@ class YouTube:
         self._title = None
         self._publish_date = None
     def __repr__(self):
         return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
@@ -102,13 +98,6 @@ class YouTube:
         self._embed_html = request.get(url=self.embed_url)
         return self._embed_html
-    @property
-    def vid_info_raw(self):
-        if self._vid_info_raw:
-            return self._vid_info_raw
-        self._vid_info_raw = request.get(self.vid_info_url)
-        return self._vid_info_raw
     @property
     def age_restricted(self):
         if self._age_restricted:
@@ -116,21 +105,6 @@ class YouTube:
         self._age_restricted = extract.is_age_restricted(self.watch_html)
         return self._age_restricted
-    @property
-    def vid_info_url(self):
-        if self._vid_info_url:
-            return self._vid_info_url
-        if self.age_restricted:
-            self._vid_info_url = extract.video_info_url_age_restricted(
-                self.video_id, self.watch_url
-            )
-        else:
-            self._vid_info_url = extract.video_info_url(
-                video_id=self.video_id, watch_url=self.watch_url
-            )
-        return self._vid_info_url
     @property
     def js_url(self):
         if self._js_url:
@@ -159,20 +133,6 @@ class YouTube:
         return self._js
-    @property
-    def player_response(self):
-        """The player response contains subtitle information and video details."""
-        if self._player_response:
-            return self._player_response
-        if isinstance(self.player_config_args["player_response"], str):
-            self._player_response = json.loads(
-                self.player_config_args["player_response"]
-            )
-        else:
-            self._player_response = self.player_config_args["player_response"]
-        return self._player_response
     @property
     def initial_data(self):
         if self._initial_data:
@@ -181,21 +141,13 @@ class YouTube:
         return self._initial_data
     @property
-    def player_config_args(self):
-        if self._player_config_args:
-            return self._player_config_args
-        self._player_config_args = self.vid_info
-        # On pre-signed videos, we need to use get_ytplayer_config to fix
-        #  the player_response item
-        if 'streamingData' not in self.player_config_args['player_response']:
-            config_response = extract.get_ytplayer_config(self.watch_html)
-            if 'args' in config_response:
-                self.player_config_args['player_response'] = config_response['args']['player_response']  # noqa: E501
-            else:
-                self.player_config_args['player_response'] = config_response
-        return self._player_config_args
     @property
     def fmt_streams(self):
@@ -209,39 +161,29 @@ class YouTube:
             return self._fmt_streams
         self._fmt_streams = []
-        # https://github.com/pytube/pytube/issues/165
-        stream_maps = ["url_encoded_fmt_stream_map"]
-        if "adaptive_fmts" in self.player_config_args:
-            stream_maps.append("adaptive_fmts")
-        # unscramble the progressive and adaptive stream manifests.
-        for fmt in stream_maps:
-            if not self.age_restricted and fmt in self.vid_info:
-                extract.apply_descrambler(self.vid_info, fmt)
-            extract.apply_descrambler(self.player_config_args, fmt)
-            # If the cached js doesn't work, try fetching a new js file
-            # https://github.com/pytube/pytube/issues/1054
-            try:
-                extract.apply_signature(self.player_config_args, fmt, self.js)
-            except exceptions.ExtractError:
-                # To force an update to the js file, we clear the cache and retry
-                self._js = None
-                self._js_url = None
-                pytube.__js__ = None
-                pytube.__js_url__ = None
-                extract.apply_signature(self.player_config_args, fmt, self.js)
-            # build instances of :class:`Stream <Stream>`
-            # Initialize stream objects
-            stream_manifest = self.player_config_args[fmt]
-            for stream in stream_manifest:
-                video = Stream(
-                    stream=stream,
-                    player_config_args=self.player_config_args,
-                    monostate=self.stream_monostate,
-                )
-                self._fmt_streams.append(video)
         self.stream_monostate.title = self.title
         self.stream_monostate.duration = self.length
@@ -266,9 +208,6 @@ class YouTube:
                 elif reason == 'This live stream recording is not available.':
                     raise exceptions.RecordingUnavailable(video_id=self.video_id)
                 else:
-                    if reason == 'Video unavailable':
-                        if extract.is_region_blocked(self.watch_html):
-                            raise exceptions.VideoRegionBlocked(video_id=self.video_id)
                     raise exceptions.VideoUnavailable(video_id=self.video_id)
             elif status == 'LOGIN_REQUIRED':
                 if reason == (
@@ -288,7 +227,32 @@ class YouTube:
         :rtype: Dict[Any, Any]
         """
-        return dict(parse_qsl(self.vid_info_raw))
     @property
     def caption_tracks(self) -> List[pytube.Caption]:
@@ -297,7 +261,7 @@ class YouTube:
         :rtype: List[Caption]
         """
         raw_tracks = (
-            self.player_response.get("captions", {})
             .get("playerCaptionsTracklistRenderer", {})
             .get("captionTracks", [])
         )
@@ -327,7 +291,7 @@ class YouTube:
         :rtype: str
         """
         thumbnail_details = (
-            self.player_response.get("videoDetails", {})
             .get("thumbnail", {})
             .get("thumbnails")
         )
@@ -363,7 +327,7 @@ class YouTube:
             return self._title
         try:
-            self._title = self.player_response['videoDetails']['title']
         except KeyError:
             # Check_availability will raise the correct exception in most cases
             #  if it doesn't, ask for a report.
@@ -388,7 +352,7 @@ class YouTube:
         :rtype: str
         """
-        return self.player_response.get("videoDetails", {}).get("shortDescription")
     @property
     def rating(self) -> float:
@@ -397,7 +361,7 @@ class YouTube:
         :rtype: float
         """
-        return self.player_response.get("videoDetails", {}).get("averageRating")
     @property
     def length(self) -> int:
@@ -405,14 +369,7 @@ class YouTube:
         :rtype: int
         """
-        return int(
-            self.player_config_args.get("length_seconds")
-            or (
-                self.player_response.get("videoDetails", {}).get(
-                    "lengthSeconds"
-                )
-            )
-        )
     @property
     def views(self) -> int:
@@ -420,9 +377,7 @@ class YouTube:
         :rtype: int
         """
-        return int(
-            self.player_response.get("videoDetails", {}).get("viewCount")
-        )
     @property
     def author(self) -> str:
@@ -431,7 +386,7 @@ class YouTube:
         """
         if self._author:
             return self._author
-        self._author = self.player_response.get("videoDetails", {}).get(
             "author", "unknown"
         )
         return self._author
@@ -447,7 +402,7 @@ class YouTube:
         :rtype: List[str]
         """
-        return self.player_response.get('videoDetails', {}).get('keywords', [])
     @property
     def channel_id(self) -> str:
@@ -455,7 +410,7 @@ class YouTube:
         :rtype: str
         """
-        return self.player_response.get('videoDetails', {}).get('channelId', None)
     @property
     def channel_url(self) -> str:

 smaller peripheral modules and functions.
 """
 import logging
 from typing import Any, Callable, Dict, List, Optional
 import pytube
 import pytube.exceptions as exceptions
 from pytube import extract, request
 from pytube import Stream, StreamQuery
 from pytube.helpers import install_proxy
+from pytube.innertube import InnerTube
 from pytube.metadata import YouTubeMetadata
 from pytube.monostate import Monostate
         on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
         on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
         proxies: Dict[str, str] = None,
+        use_oauth: bool = False,
+        allow_oauth_cache: bool = True
     ):
         """Construct a :class:`YouTube <YouTube>`.
         self._js: Optional[str] = None  # js fetched by js_url
         self._js_url: Optional[str] = None  # the url to the js, parsed from watch html
+        self._vid_info: Optional[Dict] = None  # content fetched from innertube/player
         self._watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
         self._embed_html: Optional[str] = None
         self._player_config_args: Optional[Dict] = None  # inline js in the html containing
         self._age_restricted: Optional[bool] = None
         self._fmt_streams: Optional[List[Stream]] = None
         self._title = None
         self._publish_date = None
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
     def __repr__(self):
         return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
         self._embed_html = request.get(url=self.embed_url)
         return self._embed_html
     @property
     def age_restricted(self):
         if self._age_restricted:
         self._age_restricted = extract.is_age_restricted(self.watch_html)
         return self._age_restricted
     @property
     def js_url(self):
         if self._js_url:
         return self._js
     @property
     def initial_data(self):
         if self._initial_data:
         return self._initial_data
     @property
+    def streaming_data(self):
+        """Return streamingData from video info."""
+        if 'streamingData' in self.vid_info:
+            return self.vid_info['streamingData']
+        else:
+            self.bypass_age_gate()
+            return self.vid_info['streamingData']
     @property
     def fmt_streams(self):
             return self._fmt_streams
         self._fmt_streams = []
+        stream_manifest = extract.apply_descrambler(self.streaming_data)
+        # If the cached js doesn't work, try fetching a new js file
+        # https://github.com/pytube/pytube/issues/1054
+        try:
+            extract.apply_signature(stream_manifest, self.vid_info, self.js)
+        except exceptions.ExtractError:
+            # To force an update to the js file, we clear the cache and retry
+            self._js = None
+            self._js_url = None
+            pytube.__js__ = None
+            pytube.__js_url__ = None
+            extract.apply_signature(stream_manifest, self.vid_info, self.js)
+        # build instances of :class:`Stream <Stream>`
+        # Initialize stream objects
+        for stream in stream_manifest:
+            video = Stream(
+                stream=stream,
+                monostate=self.stream_monostate,
+            )
+            self._fmt_streams.append(video)
         self.stream_monostate.title = self.title
         self.stream_monostate.duration = self.length
                 elif reason == 'This live stream recording is not available.':
                     raise exceptions.RecordingUnavailable(video_id=self.video_id)
                 else:
                     raise exceptions.VideoUnavailable(video_id=self.video_id)
             elif status == 'LOGIN_REQUIRED':
                 if reason == (
         :rtype: Dict[Any, Any]
         """
+        if self._vid_info:
+            return self._vid_info
+        innertube = InnerTube(use_oauth=self.use_oauth, allow_cache=self.allow_oauth_cache)
+        innertube_response = innertube.player(self.video_id)
+        self._vid_info = innertube_response
+        return self._vid_info
+    def bypass_age_gate(self):
+        """Attempt to update the vid_info by bypassing the age gate."""
+        innertube = InnerTube(
+            client='ANDROID_EMBED',
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache
+        )
+        innertube_response = innertube.player(self.video_id)
+        playability_status = innertube_response['playabilityStatus'].get('status', None)
+        # If we still can't access the video, raise an exception
+        # (tier 3 age restriction)
+        if playability_status == 'UNPLAYABLE':
+            raise exceptions.AgeRestrictedError(self.video_id)
+        self._vid_info = innertube_response
     @property
     def caption_tracks(self) -> List[pytube.Caption]:
         :rtype: List[Caption]
         """
         raw_tracks = (
+            self.vid_info.get("captions", {})
             .get("playerCaptionsTracklistRenderer", {})
             .get("captionTracks", [])
         )
         :rtype: str
         """
         thumbnail_details = (
+            self.vid_info.get("videoDetails", {})
             .get("thumbnail", {})
             .get("thumbnails")
         )
             return self._title
         try:
+            self._title = self.vid_info['videoDetails']['title']
         except KeyError:
             # Check_availability will raise the correct exception in most cases
             #  if it doesn't, ask for a report.
         :rtype: str
         """
+        return self.vid_info.get("videoDetails", {}).get("shortDescription")
     @property
     def rating(self) -> float:
         :rtype: float
         """
+        return self.vid_info.get("videoDetails", {}).get("averageRating")
     @property
     def length(self) -> int:
         :rtype: int
         """
+        return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
     @property
     def views(self) -> int:
         :rtype: int
         """
+        return int(self.vid_info.get("videoDetails", {}).get("viewCount"))
     @property
     def author(self) -> str:
         """
         if self._author:
             return self._author
+        self._author = self.vid_info.get("videoDetails", {}).get(
             "author", "unknown"
         )
         return self._author
         :rtype: List[str]
         """
+        return self.vid_info.get('videoDetails', {}).get('keywords', [])
     @property
     def channel_id(self) -> str:
         :rtype: str
         """
+        return self.vid_info.get('videoDetails', {}).get('channelId', None)
     @property
     def channel_url(self) -> str:

pytube/captions.py CHANGED Viewed

@@ -19,7 +19,17 @@ class Caption:
             Caption track data extracted from ``watch_html``.
         """
         self.url = caption_track.get("baseUrl")
-        self.name = caption_track["name"]["simpleText"]
         # Use "vssId" instead of "languageCode", fix issue #779
         self.code = caption_track["vssId"]
         # Remove preceding '.' for backwards compatibility, e.g.:

             Caption track data extracted from ``watch_html``.
         """
         self.url = caption_track.get("baseUrl")
+        # Certain videos have runs instead of simpleText
+        #  this handles that edge case
+        name_dict = caption_track['name']
+        if 'simpleText' in name_dict:
+            self.name = name_dict['simpleText']
+        else:
+            for el in name_dict['runs']:
+                if 'text' in el:
+                    self.name = el['text']
         # Use "vssId" instead of "languageCode", fix issue #779
         self.code = caption_track["vssId"]
         # Remove preceding '.' for backwards compatibility, e.g.:

pytube/contrib/search.py CHANGED Viewed

@@ -145,6 +145,10 @@ class Search:
                 if 'didYouMeanRenderer' in video_details:
                     continue
                 if 'videoRenderer' not in video_details:
                     logger.warn('Unexpected renderer encountered.')
                     logger.warn(f'Renderer name: {video_details.keys()}')

                 if 'didYouMeanRenderer' in video_details:
                     continue
+                # Seems to be the renderer used for the image shown on a no results page
+                if 'backgroundPromoRenderer' in video_details:
+                    continue
                 if 'videoRenderer' not in video_details:
                     logger.warn('Unexpected renderer encountered.')
                     logger.warn(f'Renderer name: {video_details.keys()}')

pytube/exceptions.py CHANGED Viewed

@@ -53,9 +53,23 @@ class VideoUnavailable(PytubeError):
         return f'{self.video_id} is unavailable'
 class LiveStreamError(VideoUnavailable):
     """Video is a live stream."""
     def __init__(self, video_id: str):
         """
         :param str video_id:

         return f'{self.video_id} is unavailable'
+class AgeRestrictedError(VideoUnavailable):
+    """Video is age restricted, and cannot be accessed without OAuth."""
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+    @property
+    def error_string(self):
+        return f"{self.video_id} is age restricted, and can't be accessed without logging in."
 class LiveStreamError(VideoUnavailable):
     """Video is a live stream."""
     def __init__(self, video_id: str):
         """
         :param str video_id:

pytube/extract.py CHANGED Viewed

@@ -1,12 +1,11 @@
 """This module contains all non-cipher related data extraction logic."""
-import json
 import logging
 import urllib.parse
 import re
 from collections import OrderedDict
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Tuple
-from urllib.parse import parse_qs, parse_qsl, quote, unquote, urlencode, urlparse
 from pytube.cipher import Cipher
 from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
@@ -90,34 +89,6 @@ def is_age_restricted(watch_html: str) -> bool:
     return True
-def is_region_blocked(watch_html: str) -> bool:
-    """Determine if a video is not available in the user's region.
-    :param str watch_html:
-        The html contents of the watch page.
-    :rtype: bool
-    :returns:
-        True if the video is blocked in the users region.
-        False if not, or if unknown.
-    """
-    player_response = initial_player_response(watch_html)
-    country_code_patterns = [
-        r"gl\s*=\s*['\"](\w{2})['\"]",  # gl="US"
-        r"['\"]gl['\"]\s*:\s*['\"](\w{2})['\"]"  # "gl":"US"
-    ]
-    for pattern in country_code_patterns:
-        try:
-            yt_detected_country = regex_search(pattern, watch_html, 1)
-            available_countries = player_response[
-                'microformat']['playerMicroformatRenderer']['availableCountries']
-        except (KeyError, RegexMatchError):
-            pass
-        else:
-            if yt_detected_country not in available_countries:
-                return True
-    return False
 def playability_status(watch_html: str) -> (str, str):
     """Return the playability status and status explanation of a video.
@@ -197,10 +168,10 @@ def channel_name(url: str) -> str:
         YouTube channel name.
     """
     patterns = [
-        r"(?:\/(c)\/([\d\w_\-]+)(\/.*)?)",
-        r"(?:\/(channel)\/([\w\d_\-]+)(\/.*)?)",
-        r"(?:\/(u)\/([\d\w_\-]+)(\/.*)?)",
-        r"(?:\/(user)\/([\w\d_\-]+)(\/.*)?)"
     ]
     for pattern in patterns:
         regex = re.compile(pattern)
@@ -426,29 +397,23 @@ def get_ytcfg(html: str) -> str:
     )
-def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
     """Apply the decrypted signature to the stream manifest.
-    :param dict config_args:
         Details of the media streams available.
-    :param str fmt:
-        Key in stream manifests (``ytplayer_config``) containing progressive
-        download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
-        ``adaptive_fmts``).
     :param str js:
         The contents of the base.js asset file.
     """
     cipher = Cipher(js=js)
-    stream_manifest = config_args[fmt]
     for i, stream in enumerate(stream_manifest):
         try:
             url: str = stream["url"]
         except KeyError:
             live_stream = (
-                json.loads(config_args["player_response"])
-                .get("playabilityStatus", {},)
                 .get("liveStreamability")
             )
             if live_stream:
@@ -468,27 +433,28 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
         logger.debug(
             "finished descrambling signature for itag=%s", stream["itag"]
         )
         query_params = parse_qs(urlparse(url).query)
         if 'ratebypass' not in query_params.keys():
             # Cipher n to get the updated value
-            initial_n = list(query_params['n'][0])
             new_n = cipher.calculate_n(initial_n)
-            query_params['n'][0] = new_n
-            # Update the value
-            parsed = urlparse(url)
-            # The parsed query params are lists of a single element, convert to proper dicts.
-            query_params = {
-                k: v[0] for k,v in query_params.items()
-            }
-            url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}?{urlencode(query_params)}'
         # 403 forbidden fix
-        stream_manifest[i]["url"] = url + "&sig=" + signature
-def apply_descrambler(stream_data: Dict, key: str) -> None:
     """Apply various in-place transforms to YouTube's media stream data.
     Creates a ``list`` of dictionaries by string splitting on commas, then
@@ -497,8 +463,6 @@ def apply_descrambler(stream_data: Dict, key: str) -> None:
     :param dict stream_data:
         Dictionary containing query string encoded values.
-    :param str key:
-        Name of the key in dictionary.
     **Example**:
@@ -508,64 +472,27 @@ def apply_descrambler(stream_data: Dict, key: str) -> None:
     {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
     """
-    otf_type = "FORMAT_STREAM_TYPE_OTF"
-    if key == "url_encoded_fmt_stream_map" and not stream_data.get(
-        "url_encoded_fmt_stream_map"
-    ):
-        if isinstance(stream_data["player_response"], str):
-            streaming_data = json.loads(stream_data["player_response"])["streamingData"]
-        else:
-            streaming_data = stream_data["player_response"]["streamingData"]
-        formats = []
-        if 'formats' in streaming_data.keys():
-            formats.extend(streaming_data['formats'])
-        if 'adaptiveFormats' in streaming_data.keys():
-            formats.extend(streaming_data['adaptiveFormats'])
-        try:
-            stream_data[key] = [
-                {
-                    "url": format_item["url"],
-                    "type": format_item["mimeType"],
-                    "quality": format_item["quality"],
-                    "itag": format_item["itag"],
-                    "fps": format_item["fps"] if 'video' in format_item["mimeType"] else None,
-                    "bitrate": format_item.get("bitrate"),
-                    "is_otf": (format_item.get("type") == otf_type),
-                    'content_length': int(format_item.get('contentLength', 0)),
-                }
-                for format_item in formats
-            ]
-        except KeyError:
-            cipher_url = [
-                parse_qs(
-                    data[
-                        "cipher" if "cipher" in data.keys() else "signatureCipher"
-                    ]
-                )
-                for data in formats
-            ]
-            stream_data[key] = [
-                {
-                    "url": cipher_url[i]["url"][0],
-                    "s": cipher_url[i]["s"][0],
-                    "type": format_item["mimeType"],
-                    "quality": format_item["quality"],
-                    "itag": format_item["itag"],
-                    "fps": format_item["fps"] if 'video' in format_item["mimeType"] else None,
-                    "bitrate": format_item.get("bitrate"),
-                    "is_otf": (format_item.get("type") == otf_type),
-                    'content_length': int(format_item.get('contentLength', 0)),
-                }
-                for i, format_item in enumerate(formats)
-            ]
-    else:
-        stream_data[key] = [
-            {k: unquote(v) for k, v in parse_qsl(i)}
-            for i in stream_data[key].split(",")
-        ]
     logger.debug("applying descrambler")
 def initial_data(watch_html: str) -> str:

 """This module contains all non-cipher related data extraction logic."""
 import logging
 import urllib.parse
 import re
 from collections import OrderedDict
 from datetime import datetime
 from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import parse_qs, quote, urlencode, urlparse
 from pytube.cipher import Cipher
 from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
     return True
 def playability_status(watch_html: str) -> (str, str):
     """Return the playability status and status explanation of a video.
         YouTube channel name.
     """
     patterns = [
+        r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
+        r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
+        r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
+        r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"
     ]
     for pattern in patterns:
         regex = re.compile(pattern)
     )
+def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str) -> None:
     """Apply the decrypted signature to the stream manifest.
+    :param dict stream_manifest:
         Details of the media streams available.
     :param str js:
         The contents of the base.js asset file.
     """
     cipher = Cipher(js=js)
     for i, stream in enumerate(stream_manifest):
         try:
             url: str = stream["url"]
         except KeyError:
             live_stream = (
+                vid_info.get("playabilityStatus", {},)
                 .get("liveStreamability")
             )
             if live_stream:
         logger.debug(
             "finished descrambling signature for itag=%s", stream["itag"]
         )
+        parsed_url = urlparse(url)
+        # Convert query params off url to dict
         query_params = parse_qs(urlparse(url).query)
+        query_params = {
+            k: v[0] for k,v in query_params.items()
+        }
+        query_params['sig'] = signature
         if 'ratebypass' not in query_params.keys():
             # Cipher n to get the updated value
+            initial_n = list(query_params['n'])
             new_n = cipher.calculate_n(initial_n)
+            query_params['n'] = new_n
+        url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}'  # noqa:E501
         # 403 forbidden fix
+        stream_manifest[i]["url"] = url
+def apply_descrambler(stream_data: Dict) -> None:
     """Apply various in-place transforms to YouTube's media stream data.
     Creates a ``list`` of dictionaries by string splitting on commas, then
     :param dict stream_data:
         Dictionary containing query string encoded values.
     **Example**:
     {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
     """
+    if 'url' in stream_data:
+        return None
+    # Merge formats and adaptiveFormats into a single list
+    formats = []
+    if 'formats' in stream_data.keys():
+        formats.extend(stream_data['formats'])
+    if 'adaptiveFormats' in stream_data.keys():
+        formats.extend(stream_data['adaptiveFormats'])
+    # Extract url and s from signatureCiphers as necessary
+    for data in formats:
+        if 'url' not in data:
+            if 'signatureCipher' in data:
+                cipher_url = parse_qs(data['signatureCipher'])
+                data['url'] = cipher_url['url'][0]
+                data['s'] = cipher_url['s'][0]
+        data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
     logger.debug("applying descrambler")
+    return formats
 def initial_data(watch_html: str) -> str:

pytube/helpers.py CHANGED Viewed

@@ -288,7 +288,6 @@ def generate_all_html_json_mocks():
     test_vid_ids = [
         '2lAe1cqCOXo',
         '5YceQ8YqYMc',
-        'hZpzr8TbF08',
         'irauhITDrsE',
         'm8uHb5jIGN8',
         'QRS8MkLhQmM',
@@ -326,7 +325,7 @@ def create_mock_html_json(vid_id) -> Dict[str, Any]:
         'js': yt.js,
         'embed_html': yt.embed_html,
         'watch_html': yt.watch_html,
-        'vid_info_raw': yt.vid_info_raw
     }
     logger.info(f'Outputing json.gz file to {gzip_filepath}')

     test_vid_ids = [
         '2lAe1cqCOXo',
         '5YceQ8YqYMc',
         'irauhITDrsE',
         'm8uHb5jIGN8',
         'QRS8MkLhQmM',
         'js': yt.js,
         'embed_html': yt.embed_html,
         'watch_html': yt.watch_html,
+        'vid_info': yt.vid_info
     }
     logger.info(f'Outputing json.gz file to {gzip_filepath}')

pytube/innertube.py CHANGED Viewed

@@ -5,13 +5,28 @@ interfaces returns raw results. These should instead be parsed to extract
 the useful information for the end user.
 """
 # Native python imports
-from datetime import datetime
 import json
 from urllib import parse
 # Local imports
 from pytube import request
 _default_clients = {
     'WEB': {
@@ -31,34 +46,158 @@ _default_clients = {
             }
         },
         'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
     }
 }
 _token_timeout = 1800
 class InnerTube:
     """Object for interacting with the innertube API."""
-    def __init__(self, client='WEB', bearer_token=None):
         self.context = _default_clients[client]['context']
         self.api_key = _default_clients[client]['api_key']
-        self.bearer_token = bearer_token
-        self.last_refresh = None
-        self.refresh_bearer_token()
     def refresh_bearer_token(self, force=False):
-        """Refreshes the OAuth token.
-        This is skeleton code for potential future functionality, so it is incomplete.
         """
-        # Skip refresh if it's been less than 30 minutes
-        if self.last_refresh and not force:
-            # Use a 30-minute timer.
-            if (datetime.now() - self.last_refresh).total_seconds() < _token_timeout:
-                return
-        # TODO: Refresh the token
-        self.last_refresh = datetime.now()
     @property
     def base_url(self):
@@ -76,19 +215,29 @@ class InnerTube:
     def base_params(self):
         """Return the base query parameters to transmit to the innertube API."""
         return {
-            'key': self.api_key
         }
     def _call_api(self, endpoint, query, data):
         """Make a request to a given endpoint with the provided query parameters and data."""
         endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
         headers = {
             'Content-Type': 'application/json',
         }
         # Add the bearer token if applicable
-        if self.bearer_token:
-            self.refresh_bearer_token()
-            headers['authorization'] = f'Bearer {self.bearer_token}'
         response = request._execute_request(
             endpoint_url,

 the useful information for the end user.
 """
 # Native python imports
 import json
+import os
+import pathlib
+import time
 from urllib import parse
 # Local imports
 from pytube import request
+# YouTube on TV client secrets
+_client_id = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
+_client_secret = 'SboVhoG9s0rNafixCSGGKXAT'
+# Extracted API keys -- unclear what these are linked to.
+_api_keys = [
+    'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+    'AIzaSyCtkvNIR1HCEwzsqK6JuE6KqpyjusIRI30',
+    'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+    'AIzaSyC8UYZpvA2eknNex0Pjid0_eTLJoDu6los',
+    'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
+    'AIzaSyDHQ9ipnphqTzDqZsbtd8_Ru4_kiKVQe2k'
+]
 _default_clients = {
     'WEB': {
             }
         },
         'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    },
+    'WEB_EMBED': {
+        'context': {
+            'client': {
+                'clientName': 'WEB',
+                'clientVersion': '2.20210721.00.00',
+                'clientScreen': 'EMBED'
+            }
+        },
+        'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    },
+    'ANDROID_EMBED': {
+        'context': {
+            'client': {
+                'clientName': 'ANDROID',
+                'clientVersion': '16.20',
+                'clientScreen': 'EMBED'
+            }
+        },
+        'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
     }
 }
 _token_timeout = 1800
+_cache_dir = pathlib.Path(__file__).parent.resolve() / '__cache__'
+_token_file = os.path.join(_cache_dir, 'tokens.json')
 class InnerTube:
     """Object for interacting with the innertube API."""
+    def __init__(self, client='ANDROID', use_oauth=False, allow_cache=True):
+        """Initialize an InnerTube object.
+        :param str client:
+            Client to use for the object.
+            Default to web because it returns the most playback types.
+        :param bool use_oauth:
+            Whether or not to authenticate to YouTube.
+        :param bool allow_cache:
+            Allows caching of oauth tokens on the machine.
+        """
         self.context = _default_clients[client]['context']
         self.api_key = _default_clients[client]['api_key']
+        self.access_token = None
+        self.refresh_token = None
+        self.use_oauth = use_oauth
+        self.allow_cache = allow_cache
+        # Stored as epoch time
+        self.expires = None
+        # Try to load from file if specified
+        if self.use_oauth and self.allow_cache:
+            # Try to load from file if possible
+            if os.path.exists(_token_file):
+                with open(_token_file) as f:
+                    data = json.load(f)
+                    self.access_token = data['access_token']
+                    self.refresh_token = data['refresh_token']
+                    self.expires = data['expires']
+                    self.refresh_bearer_token()
+    def cache_tokens(self):
+        """Cache tokens to file if allowed."""
+        if not self.allow_cache:
+            return
+        data = {
+            'access_token': self.access_token,
+            'refresh_token': self.refresh_token,
+            'expires': self.expires
+        }
+        if not os.path.exists(_cache_dir):
+            os.mkdir(_cache_dir)
+        with open(_token_file, 'w') as f:
+            json.dump(data, f)
     def refresh_bearer_token(self, force=False):
+        """Refreshes the OAuth token if necessary.
+        :param bool force:
+            Force-refresh the bearer token.
         """
+        if not self.use_oauth:
+            return
+        # Skip refresh if it's not necessary and not forced
+        if self.expires > time.time() and not force:
+            return
+        # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
+        start_time = int(time.time() - 30)
+        data = {
+            'client_id': _client_id,
+            'client_secret': _client_secret,
+            'grant_type': 'refresh_token',
+            'refresh_token': self.refresh_token
+        }
+        response = request._execute_request(
+            'https://oauth2.googleapis.com/token',
+            'POST',
+            headers={
+                'Content-Type': 'application/json'
+            },
+            data=data
+        )
+        response_data = json.loads(response.read())
+        self.access_token = response_data['access_token']
+        self.expires = start_time + response_data['expires_in']
+        self.cache_tokens()
+    def fetch_bearer_token(self):
+        """Fetch an OAuth token."""
+        # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
+        start_time = int(time.time() - 30)
+        data = {
+            'client_id': _client_id,
+            'scope': 'https://www.googleapis.com/auth/youtube'
+        }
+        response = request._execute_request(
+            'https://oauth2.googleapis.com/device/code',
+            'POST',
+            headers={
+                'Content-Type': 'application/json'
+            },
+            data=data
+        )
+        response_data = json.loads(response.read())
+        verification_url = response_data['verification_url']
+        user_code = response_data['user_code']
+        print(f'Please open {verification_url} and input code {user_code}')
+        input('Press enter when you have completed this step.')
+        data = {
+            'client_id': _client_id,
+            'client_secret': _client_secret,
+            'device_code': response_data['device_code'],
+            'grant_type': 'urn:ietf:params:oauth:grant-type:device_code'
+        }
+        response = request._execute_request(
+            'https://oauth2.googleapis.com/token',
+            'POST',
+            headers={
+                'Content-Type': 'application/json'
+            },
+            data=data
+        )
+        response_data = json.loads(response.read())
+        self.access_token = response_data['access_token']
+        self.refresh_token = response_data['refresh_token']
+        self.expires = start_time + response_data['expires_in']
+        self.cache_tokens()
     @property
     def base_url(self):
     def base_params(self):
         """Return the base query parameters to transmit to the innertube API."""
         return {
+            'key': self.api_key,
+            'contentCheckOk': True,
+            'racyCheckOk': True
         }
     def _call_api(self, endpoint, query, data):
         """Make a request to a given endpoint with the provided query parameters and data."""
+        # Remove the API key if oauth is being used.
+        if self.use_oauth:
+            del query['key']
         endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
         headers = {
             'Content-Type': 'application/json',
         }
         # Add the bearer token if applicable
+        if self.use_oauth:
+            if self.access_token:
+                self.refresh_bearer_token()
+                headers['Authorization'] = f'Bearer {self.access_token}'
+            else:
+                self.fetch_bearer_token()
+                headers['Authorization'] = f'Bearer {self.access_token}'
         response = request._execute_request(
             endpoint_url,

pytube/streams.py CHANGED Viewed

@@ -25,15 +25,12 @@ class Stream:
     """Container for stream manifest data."""
     def __init__(
-        self, stream: Dict, player_config_args: Dict, monostate: Monostate
     ):
         """Construct a :class:`Stream <Stream>`.
         :param dict stream:
             The unscrambled data extracted from YouTube.
-        :param dict player_config_args:
-            The data object containing video media data like title and
-            keywords.
         :param dict monostate:
             Dictionary of data shared across all instances of
             :class:`Stream <Stream>`.
@@ -50,7 +47,7 @@ class Stream:
         # set type and codec info
         # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
-        self.mime_type, self.codecs = extract.mime_type_codec(stream["type"])
         # 'video/webm' -> 'video', 'webm'
         self.type, self.subtype = self.mime_type.split("/")
@@ -62,16 +59,16 @@ class Stream:
         self.is_otf: bool = stream["is_otf"]
         self.bitrate: Optional[int] = stream["bitrate"]
-        self._filesize: Optional[int] = stream['content_length']  # filesize in bytes
         # Additional information about the stream format, such as resolution,
         # frame rate, and whether the stream is live (HLS) or 3D.
         itag_profile = get_format_profile(self.itag)
         self.is_dash = itag_profile["is_dash"]
         self.abr = itag_profile["abr"]  # average bitrate (audio streams only)
-        self.fps = stream[
-            "fps"
-        ]  # frames per second (video streams only)
         self.resolution = itag_profile[
             "resolution"
         ]  # resolution (e.g.: "480p")
@@ -79,9 +76,6 @@ class Stream:
         self.is_hdr = itag_profile["is_hdr"]
         self.is_live = itag_profile["is_live"]
-        # The player configuration, contains info like the video title.
-        self.player_config_args = player_config_args
     @property
     def is_adaptive(self) -> bool:
         """Whether the stream is DASH.

     """Container for stream manifest data."""
     def __init__(
+        self, stream: Dict, monostate: Monostate
     ):
         """Construct a :class:`Stream <Stream>`.
         :param dict stream:
             The unscrambled data extracted from YouTube.
         :param dict monostate:
             Dictionary of data shared across all instances of
             :class:`Stream <Stream>`.
         # set type and codec info
         # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
+        self.mime_type, self.codecs = extract.mime_type_codec(stream["mimeType"])
         # 'video/webm' -> 'video', 'webm'
         self.type, self.subtype = self.mime_type.split("/")
         self.is_otf: bool = stream["is_otf"]
         self.bitrate: Optional[int] = stream["bitrate"]
+        # filesize in bytes
+        self._filesize: Optional[int] = int(stream.get('contentLength', 0))
         # Additional information about the stream format, such as resolution,
         # frame rate, and whether the stream is live (HLS) or 3D.
         itag_profile = get_format_profile(self.itag)
         self.is_dash = itag_profile["is_dash"]
         self.abr = itag_profile["abr"]  # average bitrate (audio streams only)
+        if 'fps' in stream:
+            self.fps = stream['fps']  # Video streams only
         self.resolution = itag_profile[
             "resolution"
         ]  # resolution (e.g.: "480p")
         self.is_hdr = itag_profile["is_hdr"]
         self.is_live = itag_profile["is_live"]
     @property
     def is_adaptive(self) -> bool:
         """Whether the stream is DASH.

tests/conftest.py CHANGED Viewed

@@ -27,7 +27,6 @@ def load_and_init_from_playback_file(filename, mock_urlopen):
     mock_url_open_object = mock.Mock()
     mock_url_open_object.read.side_effect = [
         pb['watch_html'].encode('utf-8'),
-        pb['vid_info_raw'].encode('utf-8'),
         pb['js'].encode('utf-8')
     ]
     mock_urlopen.return_value = mock_url_open_object
@@ -39,10 +38,9 @@ def load_and_init_from_playback_file(filename, mock_urlopen):
     #  deferred
     v = YouTube(pb["url"])
     v.watch_html
-    v.vid_info_raw
     v.js
     v.fmt_streams
-    v.player_response
     return v
@@ -81,13 +79,6 @@ def missing_recording():
     return load_playback_file(filename)
-@pytest.fixture
-def region_blocked():
-    """Youtube instance initialized with video id hZpzr8TbF08."""
-    filename = "yt-video-hZpzr8TbF08-html.json.gz"
-    return load_playback_file(filename)
 @pytest.fixture
 def playlist_html():
     """Youtube playlist HTML loaded on 2020-01-25 from

     mock_url_open_object = mock.Mock()
     mock_url_open_object.read.side_effect = [
         pb['watch_html'].encode('utf-8'),
         pb['js'].encode('utf-8')
     ]
     mock_urlopen.return_value = mock_url_open_object
     #  deferred
     v = YouTube(pb["url"])
     v.watch_html
+    v._vid_info = pb['vid_info']
     v.js
     v.fmt_streams
     return v
     return load_playback_file(filename)
 @pytest.fixture
 def playlist_html():
     """Youtube playlist HTML loaded on 2020-01-25 from

tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz CHANGED Viewed

Binary files a/tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz and b/tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz differ

tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz CHANGED Viewed

Binary files a/tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz and b/tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz differ

tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz CHANGED Viewed

Binary files a/tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz and b/tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz differ

tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz CHANGED Viewed

Binary files a/tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz and b/tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz differ

tests/mocks/yt-video-hZpzr8TbF08-html.json.gz DELETED Viewed

Binary file (642 kB)

tests/mocks/yt-video-irauhITDrsE-html.json.gz CHANGED Viewed

Binary files a/tests/mocks/yt-video-irauhITDrsE-html.json.gz and b/tests/mocks/yt-video-irauhITDrsE-html.json.gz differ

tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz CHANGED Viewed

Binary files a/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz and b/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz differ

tests/test_exceptions.py CHANGED Viewed

@@ -86,15 +86,3 @@ def test_raises_recording_unavailable(missing_recording):
         mock_url_open.return_value = mock_url_open_object
         with pytest.raises(exceptions.RecordingUnavailable):
             YouTube('https://youtube.com/watch?v=5YceQ8YqYMc').streams
-def test_raises_video_region_blocked(region_blocked):
-    with mock.patch('pytube.request.urlopen') as mock_url_open:
-        # Mock the responses to YouTube
-        mock_url_open_object = mock.Mock()
-        mock_url_open_object.read.side_effect = [
-            region_blocked['watch_html'].encode('utf-8')
-        ]
-        mock_url_open.return_value = mock_url_open_object
-        with pytest.raises(exceptions.VideoRegionBlocked):
-            YouTube('https://youtube.com/watch?v=hZpzr8TbF08').streams

         mock_url_open.return_value = mock_url_open_object
         with pytest.raises(exceptions.RecordingUnavailable):
             YouTube('https://youtube.com/watch?v=5YceQ8YqYMc').streams

tests/test_helpers.py CHANGED Viewed

@@ -120,7 +120,7 @@ def test_create_mock_html_json(mock_url_open, mock_open):
          b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
         b'embed_html',
         b'watch_html',
-        b'vid_info_raw',
     ]
     mock_url_open.return_value = mock_url_open_object

          b'"jsUrl":"/s/player/13371337/player_ias.vflset/en_US/base.js"'),
         b'embed_html',
         b'watch_html',
+        b'{\"responseContext\":{}}',
     ]
     mock_url_open.return_value = mock_url_open_object

tests/test_query.py CHANGED Viewed

@@ -5,18 +5,18 @@ import pytest
 @pytest.mark.parametrize(
     ("test_input", "expected"),
     [
-        ({"progressive": True}, [18, 22]),
         ({"resolution": "720p"}, [22, 136, 247, 398]),
         ({"res": "720p"}, [22, 136, 247, 398]),
         ({"fps": 24, "resolution": "480p"}, [135, 244, 397]),
-        ({"mime_type": "audio/mp4"}, [140]),
-        ({"type": "audio"}, [140, 249, 250, 251]),
-        ({"subtype": "3gpp"}, []),
         ({"abr": "128kbps"}, [140]),
         ({"bitrate": "128kbps"}, [140]),
         ({"audio_codec": "opus"}, [249, 250, 251]),
         ({"video_codec": "vp9"}, [248, 247, 244, 243, 242, 278]),
-        ({"only_audio": True}, [140, 249, 250, 251]),
         ({"only_video": True, "video_codec": "avc1.4d4015"}, [133]),
         ({"adaptive": True, "resolution": "1080p"}, [137, 248, 399]),
         ({"custom_filter_functions": [lambda s: s.itag == 18]}, [18]),
@@ -50,7 +50,7 @@ def test_get_first(cipher_signature):
     """Ensure :meth:`~pytube.StreamQuery.first` returns the expected
     :class:`Stream <Stream>`.
     """
-    assert cipher_signature.streams[0].itag == 18
 def test_order_by(cipher_signature):
@@ -61,7 +61,13 @@ def test_order_by(cipher_signature):
         s.itag
         for s in cipher_signature.streams.filter(type="audio").order_by("itag")
     ]
-    assert itags == [140, 249, 250, 251]
 def test_order_by_descending(cipher_signature):
@@ -75,7 +81,12 @@ def test_order_by_descending(cipher_signature):
         .order_by("itag")
         .desc()
     ]
-    assert itags == [251, 250, 249, 140]
 def test_order_by_non_numerical(cipher_signature):
@@ -99,7 +110,11 @@ def test_order_by_ascending(cipher_signature):
         .order_by("itag")
         .asc()
     ]
-    assert itags == [140, 249, 250, 251]
 def test_order_by_non_numerical_ascending(cipher_signature):
@@ -114,7 +129,16 @@ def test_order_by_non_numerical_ascending(cipher_signature):
 def test_order_by_with_none_values(cipher_signature):
     abrs = [s.abr for s in cipher_signature.streams.order_by("abr").asc()]
-    assert abrs == ["50kbps", "70kbps", "96kbps", "128kbps", "160kbps", "192kbps"]
 def test_get_by_itag(cipher_signature):
@@ -143,7 +167,7 @@ def test_get_highest_resolution(cipher_signature):
 def test_filter_is_dash(cipher_signature):
     streams = cipher_signature.streams.filter(is_dash=False)
     itags = [s.itag for s in streams]
-    assert itags == [18, 22]
 def test_get_audio_only(cipher_signature):
@@ -155,13 +179,13 @@ def test_get_audio_only_with_subtype(cipher_signature):
 def test_sequence(cipher_signature):
-    assert len(cipher_signature.streams) == 24
     assert cipher_signature.streams[0] is not None
 def test_otf(cipher_signature):
     non_otf = cipher_signature.streams.otf()
-    assert len(non_otf) == 24
     otf = cipher_signature.streams.otf(True)
     assert len(otf) == 0

 @pytest.mark.parametrize(
     ("test_input", "expected"),
     [
+        ({"progressive": True}, [17, 18, 22]),
         ({"resolution": "720p"}, [22, 136, 247, 398]),
         ({"res": "720p"}, [22, 136, 247, 398]),
         ({"fps": 24, "resolution": "480p"}, [135, 244, 397]),
+        ({"mime_type": "audio/mp4"}, [139, 140]),
+        ({"type": "audio"}, [139, 140, 249, 250, 251]),
+        ({"subtype": "3gpp"}, [17]),
         ({"abr": "128kbps"}, [140]),
         ({"bitrate": "128kbps"}, [140]),
         ({"audio_codec": "opus"}, [249, 250, 251]),
         ({"video_codec": "vp9"}, [248, 247, 244, 243, 242, 278]),
+        ({"only_audio": True}, [139, 140, 249, 250, 251]),
         ({"only_video": True, "video_codec": "avc1.4d4015"}, [133]),
         ({"adaptive": True, "resolution": "1080p"}, [137, 248, 399]),
         ({"custom_filter_functions": [lambda s: s.itag == 18]}, [18]),
     """Ensure :meth:`~pytube.StreamQuery.first` returns the expected
     :class:`Stream <Stream>`.
     """
+    assert cipher_signature.streams.first().itag == cipher_signature.streams[0].itag
 def test_order_by(cipher_signature):
         s.itag
         for s in cipher_signature.streams.filter(type="audio").order_by("itag")
     ]
+    expected_itags = [
+        s.itag
+        for s in cipher_signature.streams.filter(type="audio")
+    ]
+    expected_itags.sort()
+    assert itags == expected_itags
 def test_order_by_descending(cipher_signature):
         .order_by("itag")
         .desc()
     ]
+    expected_itags = [
+        s.itag
+        for s in cipher_signature.streams.filter(type="audio")
+    ]
+    expected_itags.sort(reverse=True)
+    assert itags == expected_itags
 def test_order_by_non_numerical(cipher_signature):
         .order_by("itag")
         .asc()
     ]
+    expected_itags = [
+        s.itag
+        for s in cipher_signature.streams.filter(type="audio")
+    ]
+    assert itags == expected_itags
 def test_order_by_non_numerical_ascending(cipher_signature):
 def test_order_by_with_none_values(cipher_signature):
     abrs = [s.abr for s in cipher_signature.streams.order_by("abr").asc()]
+    assert abrs == [
+        "24kbps",
+        "48kbps",
+        "50kbps",
+        "70kbps",
+        "96kbps",
+        "128kbps",
+        "160kbps",
+        "192kbps"
+    ]
 def test_get_by_itag(cipher_signature):
 def test_filter_is_dash(cipher_signature):
     streams = cipher_signature.streams.filter(is_dash=False)
     itags = [s.itag for s in streams]
+    assert itags == [17, 18, 22]
 def test_get_audio_only(cipher_signature):
 def test_sequence(cipher_signature):
+    assert len(cipher_signature.streams) == 26
     assert cipher_signature.streams[0] is not None
 def test_otf(cipher_signature):
     non_otf = cipher_signature.streams.otf()
+    assert len(non_otf) == 26
     otf = cipher_signature.streams.otf(True)
     assert len(otf) == 0

tests/test_streams.py CHANGED Viewed

@@ -28,19 +28,19 @@ def test_stream_to_buffer(mock_request, cipher_signature):
 def test_filesize(cipher_signature):
-    assert cipher_signature.streams[0].filesize == 28282013
 def test_filesize_approx(cipher_signature):
     stream = cipher_signature.streams[0]
-    assert stream.filesize_approx == 28309811
     stream.bitrate = None
-    assert stream.filesize_approx == 28282013
 def test_default_filename(cipher_signature):
-    expected = "YouTube Rewind 2019 For the Record  YouTubeRewind.mp4"
     stream = cipher_signature.streams[0]
     assert stream.default_filename == expected
@@ -137,7 +137,7 @@ def test_download_with_prefix(cipher_signature):
         file_path = stream.download(filename_prefix="prefix")
         assert file_path == os.path.join(
             "/target",
-            "prefixYouTube Rewind 2019 For the Record  YouTubeRewind.mp4"
         )
@@ -175,7 +175,7 @@ def test_download_with_existing(cipher_signature):
         file_path = stream.download()
         assert file_path == os.path.join(
             "/target",
-            "YouTube Rewind 2019 For the Record  YouTubeRewind.mp4"
         )
         assert not request.stream.called
@@ -196,7 +196,7 @@ def test_download_with_existing_no_skip(cipher_signature):
         file_path = stream.download(skip_existing=False)
         assert file_path == os.path.join(
             "/target",
-            "YouTube Rewind 2019 For the Record  YouTubeRewind.mp4"
         )
         assert request.stream.called
@@ -250,32 +250,19 @@ def test_on_complete_hook(cipher_signature):
 def test_author(cipher_signature):
-    expected = "Test author"
-    cipher_signature._player_response = {"videoDetails": {"author": expected}}
-    assert cipher_signature.author == expected
-    expected = "unknown"
-    cipher_signature.author = None
-    cipher_signature._player_response = {'key': 'value'}
-    assert cipher_signature.author == expected
 def test_thumbnail_when_in_details(cipher_signature):
-    expected = "some url"
     cipher_signature._player_response = {
         "videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
     }
     assert cipher_signature.thumbnail_url == expected
-def test_thumbnail_when_not_in_details(cipher_signature):
-    expected = "https://img.youtube.com/vi/2lAe1cqCOXo/maxresdefault.jpg"
-    cipher_signature._player_response = {'key': 'value'}
-    assert cipher_signature.thumbnail_url == expected
 def test_repr_for_audio_streams(cipher_signature):
-    stream = str(cipher_signature.streams.filter(only_audio=True)[0])
     expected = (
         '<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
         'acodec="mp4a.40.2" progressive="False" type="audio">'
@@ -293,13 +280,16 @@ def test_repr_for_video_streams(cipher_signature):
 def test_repr_for_progressive_streams(cipher_signature):
-    stream = str(cipher_signature.streams.filter(progressive=True)[0])
     expected = (
         '<Stream: itag="18" mime_type="video/mp4" res="360p" fps="24fps" '
         'vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" '
         'type="video">'
     )
-    assert stream == expected
 def test_repr_for_adaptive_streams(cipher_signature):

 def test_filesize(cipher_signature):
+    assert cipher_signature.streams[0].filesize == 3399554
 def test_filesize_approx(cipher_signature):
     stream = cipher_signature.streams[0]
+    assert stream.filesize_approx == 3403320
     stream.bitrate = None
+    assert stream.filesize_approx == 3399554
 def test_default_filename(cipher_signature):
+    expected = "YouTube Rewind 2019 For the Record  YouTubeRewind.3gpp"
     stream = cipher_signature.streams[0]
     assert stream.default_filename == expected
         file_path = stream.download(filename_prefix="prefix")
         assert file_path == os.path.join(
             "/target",
+            "prefixYouTube Rewind 2019 For the Record  YouTubeRewind.3gpp"
         )
         file_path = stream.download()
         assert file_path == os.path.join(
             "/target",
+            "YouTube Rewind 2019 For the Record  YouTubeRewind.3gpp"
         )
         assert not request.stream.called
         file_path = stream.download(skip_existing=False)
         assert file_path == os.path.join(
             "/target",
+            "YouTube Rewind 2019 For the Record  YouTubeRewind.3gpp"
         )
         assert request.stream.called
 def test_author(cipher_signature):
+    assert cipher_signature.author == 'YouTube'
 def test_thumbnail_when_in_details(cipher_signature):
+    expected = f"https://i.ytimg.com/vi/{cipher_signature.video_id}/sddefault.jpg"
     cipher_signature._player_response = {
         "videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
     }
     assert cipher_signature.thumbnail_url == expected
 def test_repr_for_audio_streams(cipher_signature):
+    stream = str(cipher_signature.streams.filter(only_audio=True)[1])
     expected = (
         '<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" '
         'acodec="mp4a.40.2" progressive="False" type="audio">'
 def test_repr_for_progressive_streams(cipher_signature):
+    stream_reprs = [
+        str(s)
+        for s in cipher_signature.streams.filter(progressive=True)
+    ]
     expected = (
         '<Stream: itag="18" mime_type="video/mp4" res="360p" fps="24fps" '
         'vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" '
         'type="video">'
     )
+    assert expected in stream_reprs
 def test_repr_for_adaptive_streams(cipher_signature):