Ferdowsi
/

pytube

Model card Files Files and versions Community

hbmartin commited on Feb 6, 2020

Commit

099e410

unverified ·

2 Parent(s): 2ff0295 c45419e

Merge pull request #35 from hbmartin/martin.playlist-pagination

Browse files

Files changed (10) hide show

.flake8 +1 -1
pytube/__main__.py +6 -6
pytube/cipher.py +1 -1
pytube/contrib/playlist.py +46 -23
pytube/extract.py +121 -4
pytube/helpers.py +19 -1
pytube/mixins.py +0 -137
tests/conftest.py +11 -0
tests/contrib/test_playlist.py +61 -26
tests/mocks/playlist_long.html.gz +0 -0

.flake8 CHANGED Viewed

@@ -1,3 +1,3 @@
 [flake8]
 ignore = E231,E203,W503
-max-line-length = 88

 [flake8]
 ignore = E231,E203,W503
+max-line-length = 89

pytube/__main__.py CHANGED Viewed

@@ -17,11 +17,11 @@ from html import unescape
 from pytube import Caption
 from pytube import CaptionQuery
 from pytube import extract
-from pytube import mixins
 from pytube import request
 from pytube import Stream
 from pytube import StreamQuery
-from pytube.mixins import install_proxy
 from pytube.exceptions import VideoUnavailable
 from pytube.monostate import OnProgress, OnComplete, Monostate
@@ -135,11 +135,11 @@ class YouTube:
         # unscramble the progressive and adaptive stream manifests.
         for fmt in stream_maps:
             if not self.age_restricted and fmt in self.vid_info:
-                mixins.apply_descrambler(self.vid_info, fmt)
-            mixins.apply_descrambler(self.player_config_args, fmt)
             try:
-                mixins.apply_signature(
                     self.player_config_args, fmt, self.js  # type: ignore
                 )
             except TypeError:
@@ -147,7 +147,7 @@ class YouTube:
                 self.js_url = extract.js_url(self.embed_html, self.age_restricted)
                 self.js = request.get(self.js_url)
                 assert self.js is not None
-                mixins.apply_signature(self.player_config_args, fmt, self.js)
             # build instances of :class:`Stream <Stream>`
             self.initialize_stream_objects(fmt)

 from pytube import Caption
 from pytube import CaptionQuery
 from pytube import extract
 from pytube import request
 from pytube import Stream
 from pytube import StreamQuery
+from pytube.extract import apply_descrambler, apply_signature
+from pytube.helpers import install_proxy
 from pytube.exceptions import VideoUnavailable
 from pytube.monostate import OnProgress, OnComplete, Monostate
         # unscramble the progressive and adaptive stream manifests.
         for fmt in stream_maps:
             if not self.age_restricted and fmt in self.vid_info:
+                apply_descrambler(self.vid_info, fmt)
+            apply_descrambler(self.player_config_args, fmt)
             try:
+                apply_signature(
                     self.player_config_args, fmt, self.js  # type: ignore
                 )
             except TypeError:
                 self.js_url = extract.js_url(self.embed_html, self.age_restricted)
                 self.js = request.get(self.js_url)
                 assert self.js is not None
+                apply_signature(self.player_config_args, fmt, self.js)
             # build instances of :class:`Stream <Stream>`
             self.initialize_stream_objects(fmt)

pytube/cipher.py CHANGED Viewed

@@ -54,7 +54,7 @@ def get_initial_function_name(js: str) -> str:
         regex = re.compile(pattern)
         results = regex.search(js)
         if results:
-            logger.debug(f"finished regex search, matched: {pattern}")
             return results.group(1)
     raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")

         regex = re.compile(pattern)
         results = regex.search(js)
         if results:
+            logger.debug("finished regex search, matched: %s", pattern)
             return results.group(1)
     raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")

pytube/contrib/playlist.py CHANGED Viewed

@@ -4,14 +4,12 @@
 import json
 import logging
 import re
-from collections import OrderedDict
 from datetime import date, datetime
 from typing import List, Optional, Iterable, Dict
 from urllib.parse import parse_qs
 from pytube import request, YouTube
-from pytube.helpers import cache, deprecated
-from pytube.mixins import install_proxy
 logger = logging.getLogger(__name__)
@@ -46,6 +44,8 @@ class Playlist:
                 f"{month} {day:0>2} {year}", "%b %d %Y"
             ).date()
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
         """Given an html page or a fragment thereof, looks for
@@ -60,41 +60,58 @@ class Playlist:
         return None
-    def parse_links(self, until_watch_id: Optional[str] = None) -> List[str]:
         """Parse the video links from the page source, extracts and
         returns the /watch?v= part from video link href
         """
         req = self.html
-        # split the page source by line and process each line
-        content = [x for x in req.split("\n") if "pl-video-title-link" in x]
-        link_list = [x.split('href="', 1)[1].split("&", 1)[0] for x in content]
         # The above only returns 100 or fewer links
         # Simulating a browser request for the load more link
         load_more_url = self._find_load_more_url(req)
         while load_more_url:  # there is an url found
             if until_watch_id:
                 try:
-                    trim_index = link_list.index(f"/watch?v={until_watch_id}")
-                    return link_list[:trim_index]
                 except ValueError:
                     pass
-            logger.debug("load more url: %s", load_more_url)
-            req = request.get(load_more_url)
-            load_more = json.loads(req)
-            videos = re.findall(
-                r"href=\"(/watch\?v=[\w-]*)", load_more["content_html"],
-            )
-            # remove duplicates
-            link_list.extend(list(OrderedDict.fromkeys(videos)))
             load_more_url = self._find_load_more_url(
                 load_more["load_more_widget_html"],
             )
-        return link_list
-    def trimmed(self, video_id: str) -> List[str]:
         """Retrieve a list of YouTube video URLs trimmed at the given video ID
         i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
         :type video_id: str
@@ -103,8 +120,9 @@ class Playlist:
         :returns:
             List of video URLs from the playlist trimmed at the given ID
         """
-        trimmed_watch = self.parse_links(until_watch_id=video_id)
-        return [self._video_url(watch_path) for watch_path in trimmed_watch]
     @property  # type: ignore
     @cache
@@ -114,10 +132,15 @@ class Playlist:
         :returns:
             List of video URLs
         """
-        return [self._video_url(watch_path) for watch_path in self.parse_links()]
     @property
     def videos(self) -> Iterable[YouTube]:
         for url in self.video_urls:
             yield YouTube(url)

 import json
 import logging
 import re
 from datetime import date, datetime
 from typing import List, Optional, Iterable, Dict
 from urllib.parse import parse_qs
 from pytube import request, YouTube
+from pytube.helpers import cache, deprecated, install_proxy, uniqueify
 logger = logging.getLogger(__name__)
                 f"{month} {day:0>2} {year}", "%b %d %Y"
             ).date()
+        self._video_regex = re.compile(r"href=\"(/watch\?v=[\w-]*)")
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
         """Given an html page or a fragment thereof, looks for
         return None
+    @deprecated("This function will be removed in the future, please use .video_urls")
+    def parse_links(self) -> List[str]:  # pragma: no cover
+        return self.video_urls
+    def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
         """Parse the video links from the page source, extracts and
         returns the /watch?v= part from video link href
         """
         req = self.html
+        videos_urls = self._extract_videos(req)
+        if until_watch_id:
+            try:
+                trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
+                yield videos_urls[:trim_index]
+                return
+            except ValueError:
+                pass
+        yield videos_urls
         # The above only returns 100 or fewer links
         # Simulating a browser request for the load more link
         load_more_url = self._find_load_more_url(req)
         while load_more_url:  # there is an url found
+            logger.debug("load more url: %s", load_more_url)
+            req = request.get(load_more_url)
+            load_more = json.loads(req)
+            try:
+                html = load_more["content_html"]
+            except KeyError:
+                logger.debug("Could not find content_html")
+                return
+            videos_urls = self._extract_videos(html)
             if until_watch_id:
                 try:
+                    trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
+                    yield videos_urls[:trim_index]
+                    return
                 except ValueError:
                     pass
+            yield videos_urls
             load_more_url = self._find_load_more_url(
                 load_more["load_more_widget_html"],
             )
+        return
+    def _extract_videos(self, html: str) -> List[str]:
+        return uniqueify(self._video_regex.findall(html))
+    def trimmed(self, video_id: str) -> Iterable[str]:
         """Retrieve a list of YouTube video URLs trimmed at the given video ID
         i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
         :type video_id: str
         :returns:
             List of video URLs from the playlist trimmed at the given ID
         """
+        for page in self._paginate(until_watch_id=video_id):
+            for watch_path in page:
+                yield self._video_url(watch_path)
     @property  # type: ignore
     @cache
         :returns:
             List of video URLs
         """
+        return [
+            self._video_url(video) for page in list(self._paginate()) for video in page
+        ]
     @property
     def videos(self) -> Iterable[YouTube]:
+        """Iterable of YouTube objects representing videos in this playlist
+        :rtype: Iterable[YouTube]
+        """
         for url in self.video_urls:
             yield YouTube(url)

pytube/extract.py CHANGED Viewed

@@ -1,15 +1,18 @@
 # -*- coding: utf-8 -*-
 """This module contains all non-cipher related data extraction logic."""
 import json
 import re
 from collections import OrderedDict
 from html.parser import HTMLParser
-from typing import Any, Optional, Tuple, List
-from urllib.parse import quote
 from urllib.parse import urlencode
-from pytube.exceptions import RegexMatchError, HTMLParseError
-from pytube.helpers import regex_search
 class PytubeHTMLParser(HTMLParser):
@@ -206,3 +209,117 @@ def get_vid_descr(html: str) -> str:
     html_parser = PytubeHTMLParser()
     html_parser.feed(html)
     return html_parser.vid_descr

 # -*- coding: utf-8 -*-
 """This module contains all non-cipher related data extraction logic."""
 import json
+import pprint
 import re
 from collections import OrderedDict
 from html.parser import HTMLParser
+from typing import Any, Optional, Tuple, List, Dict
+from urllib.parse import quote, parse_qs, unquote, parse_qsl
 from urllib.parse import urlencode
+from pytube import cipher
+from pytube.exceptions import RegexMatchError, HTMLParseError, LiveStreamError
+from pytube.helpers import regex_search, logger
 class PytubeHTMLParser(HTMLParser):
     html_parser = PytubeHTMLParser()
     html_parser.feed(html)
     return html_parser.vid_descr
+def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
+    """Apply the decrypted signature to the stream manifest.
+    :param dict config_args:
+        Details of the media streams available.
+    :param str fmt:
+        Key in stream manifests (``ytplayer_config``) containing progressive
+        download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
+        ``adaptive_fmts``).
+    :param str js:
+        The contents of the base.js asset file.
+    """
+    stream_manifest = config_args[fmt]
+    live_stream = (
+        json.loads(config_args["player_response"])
+        .get("playabilityStatus", {},)
+        .get("liveStreamability")
+    )
+    for i, stream in enumerate(stream_manifest):
+        try:
+            url: str = stream["url"]
+        except KeyError:
+            if live_stream:
+                raise LiveStreamError("Video is currently being streamed live")
+        # 403 Forbidden fix.
+        if "signature" in url or (
+            "s" not in stream and ("&sig=" in url or "&lsig=" in url)
+        ):
+            # For certain videos, YouTube will just provide them pre-signed, in
+            # which case there's no real magic to download them and we can skip
+            # the whole signature descrambling entirely.
+            logger.debug("signature found, skip decipher")
+            continue
+        if js is not None:
+            signature = cipher.get_signature(js, stream["s"])
+        else:
+            # signature not present in url (line 33), need js to descramble
+            # TypeError caught in __main__
+            raise TypeError("JS is None")
+        logger.debug(
+            "finished descrambling signature for itag=%s\n%s",
+            stream["itag"],
+            pprint.pformat({"s": stream["s"], "signature": signature,}, indent=2,),
+        )
+        # 403 forbidden fix
+        stream_manifest[i]["url"] = url + "&sig=" + signature
+def apply_descrambler(stream_data: Dict, key: str) -> None:
+    """Apply various in-place transforms to YouTube's media stream data.
+    Creates a ``list`` of dictionaries by string splitting on commas, then
+    taking each list item, parsing it as a query string, converting it to a
+    ``dict`` and unquoting the value.
+    :param dict stream_data:
+        Dictionary containing query string encoded values.
+    :param str key:
+        Name of the key in dictionary.
+    **Example**:
+    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
+    >>> apply_descrambler(d, 'foo')
+    >>> print(d)
+    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
+    """
+    if key == "url_encoded_fmt_stream_map" and not stream_data.get(
+        "url_encoded_fmt_stream_map"
+    ):
+        formats = json.loads(stream_data["player_response"])["streamingData"]["formats"]
+        formats.extend(
+            json.loads(stream_data["player_response"])["streamingData"][
+                "adaptiveFormats"
+            ]
+        )
+        try:
+            stream_data[key] = [
+                {
+                    "url": format_item["url"],
+                    "type": format_item["mimeType"],
+                    "quality": format_item["quality"],
+                    "itag": format_item["itag"],
+                }
+                for format_item in formats
+            ]
+        except KeyError:
+            cipher_url = [
+                parse_qs(formats[i]["cipher"]) for i, data in enumerate(formats)
+            ]
+            stream_data[key] = [
+                {
+                    "url": cipher_url[i]["url"][0],
+                    "s": cipher_url[i]["s"][0],
+                    "type": format_item["mimeType"],
+                    "quality": format_item["quality"],
+                    "itag": format_item["itag"],
+                }
+                for i, format_item in enumerate(formats)
+            ]
+    else:
+        stream_data[key] = [
+            {k: unquote(v) for k, v in parse_qsl(i)}
+            for i in stream_data[key].split(",")
+        ]
+    logger.debug(
+        "applying descrambler\n%s", pprint.pformat(stream_data[key], indent=2),
+    )

pytube/helpers.py CHANGED Viewed

@@ -6,7 +6,8 @@ import os
 import pprint
 import re
 import warnings
-from typing import TypeVar, Callable, Optional
 from pytube.exceptions import RegexMatchError
@@ -156,3 +157,20 @@ def target_directory(output_path: Optional[str] = None) -> str:
         output_path = os.getcwd()
     os.makedirs(output_path, exist_ok=True)
     return output_path

 import pprint
 import re
 import warnings
+from typing import TypeVar, Callable, Optional, Dict, List, Any
+from urllib import request
 from pytube.exceptions import RegexMatchError
         output_path = os.getcwd()
     os.makedirs(output_path, exist_ok=True)
     return output_path
+def install_proxy(proxy_handler: Dict[str, str]) -> None:
+    proxy_support = request.ProxyHandler(proxy_handler)
+    opener = request.build_opener(proxy_support)
+    request.install_opener(opener)
+def uniqueify(duped_list: List) -> List:
+    seen: Dict[Any, bool] = {}
+    result = []
+    for item in duped_list:
+        if item in seen:
+            continue
+        seen[item] = True
+        result.append(item)
+    return result

pytube/mixins.py DELETED Viewed

@@ -1,137 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Applies in-place data mutations."""
-import json
-import logging
-import pprint
-from typing import Dict
-from pytube import cipher
-from urllib import request
-from urllib.parse import parse_qsl
-from urllib.parse import parse_qs
-from urllib.parse import unquote
-from pytube.exceptions import LiveStreamError
-logger = logging.getLogger(__name__)
-def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
-    """Apply the decrypted signature to the stream manifest.
-    :param dict config_args:
-        Details of the media streams available.
-    :param str fmt:
-        Key in stream manifests (``ytplayer_config``) containing progressive
-        download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
-        ``adaptive_fmts``).
-    :param str js:
-        The contents of the base.js asset file.
-    """
-    stream_manifest = config_args[fmt]
-    live_stream = (
-        json.loads(config_args["player_response"])
-        .get("playabilityStatus", {},)
-        .get("liveStreamability")
-    )
-    for i, stream in enumerate(stream_manifest):
-        try:
-            url: str = stream["url"]
-        except KeyError:
-            if live_stream:
-                raise LiveStreamError("Video is currently being streamed live")
-        # 403 Forbidden fix.
-        if "signature" in url or (
-            "s" not in stream and ("&sig=" in url or "&lsig=" in url)
-        ):
-            # For certain videos, YouTube will just provide them pre-signed, in
-            # which case there's no real magic to download them and we can skip
-            # the whole signature descrambling entirely.
-            logger.debug("signature found, skip decipher")
-            continue
-        if js is not None:
-            signature = cipher.get_signature(js, stream["s"])
-        else:
-            # signature not present in url (line 33), need js to descramble
-            # TypeError caught in __main__
-            raise TypeError("JS is None")
-        logger.debug(
-            "finished descrambling signature for itag=%s\n%s",
-            stream["itag"],
-            pprint.pformat({"s": stream["s"], "signature": signature,}, indent=2,),
-        )
-        # 403 forbidden fix
-        stream_manifest[i]["url"] = url + "&sig=" + signature
-def apply_descrambler(stream_data: Dict, key: str) -> None:
-    """Apply various in-place transforms to YouTube's media stream data.
-    Creates a ``list`` of dictionaries by string splitting on commas, then
-    taking each list item, parsing it as a query string, converting it to a
-    ``dict`` and unquoting the value.
-    :param dict stream_data:
-        Dictionary containing query string encoded values.
-    :param str key:
-        Name of the key in dictionary.
-    **Example**:
-    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
-    >>> apply_descrambler(d, 'foo')
-    >>> print(d)
-    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
-    """
-    if key == "url_encoded_fmt_stream_map" and not stream_data.get(
-        "url_encoded_fmt_stream_map"
-    ):
-        formats = json.loads(stream_data["player_response"])["streamingData"]["formats"]
-        formats.extend(
-            json.loads(stream_data["player_response"])["streamingData"][
-                "adaptiveFormats"
-            ]
-        )
-        try:
-            stream_data[key] = [
-                {
-                    "url": format_item["url"],
-                    "type": format_item["mimeType"],
-                    "quality": format_item["quality"],
-                    "itag": format_item["itag"],
-                }
-                for format_item in formats
-            ]
-        except KeyError:
-            cipher_url = [
-                parse_qs(formats[i]["cipher"]) for i, data in enumerate(formats)
-            ]
-            stream_data[key] = [
-                {
-                    "url": cipher_url[i]["url"][0],
-                    "s": cipher_url[i]["s"][0],
-                    "type": format_item["mimeType"],
-                    "quality": format_item["quality"],
-                    "itag": format_item["itag"],
-                }
-                for i, format_item in enumerate(formats)
-            ]
-    else:
-        stream_data[key] = [
-            {k: unquote(v) for k, v in parse_qsl(i)}
-            for i in stream_data[key].split(",")
-        ]
-    logger.debug(
-        "applying descrambler\n%s", pprint.pformat(stream_data[key], indent=2),
-    )
-def install_proxy(proxy_handler: Dict[str, str]) -> None:
-    proxy_support = request.ProxyHandler(proxy_handler)
-    opener = request.build_opener(proxy_support)
-    request.install_opener(opener)

tests/conftest.py CHANGED Viewed

@@ -61,3 +61,14 @@ def playlist_html():
     )
     with gzip.open(file_path, "rb") as f:
         return f.read().decode("utf-8")

     )
     with gzip.open(file_path, "rb") as f:
         return f.read().decode("utf-8")
+@pytest.fixture
+def playlist_long_html():
+    """Youtube playlist HTML loaded on 2020-01-25 from
+    https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr"""
+    file_path = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)), "mocks", "playlist_long.html.gz"
+    )
+    with gzip.open(file_path, "rb") as f:
+        return f.read().decode("utf-8")

tests/contrib/test_playlist.py CHANGED Viewed

@@ -58,30 +58,6 @@ def test_init_with_watch_id(request_get):
     )
-@mock.patch("pytube.contrib.playlist.request.get")
-def test_parse_links(request_get, playlist_html):
-    url = "https://www.fakeurl.com/playlist?list=whatever"
-    request_get.return_value = playlist_html
-    playlist = Playlist(url)
-    playlist._find_load_more_url = MagicMock(return_value=None)
-    links = playlist.parse_links()
-    request_get.assert_called()
-    assert links == [
-        "/watch?v=ujTCoH21GlA",
-        "/watch?v=45ryDIPHdGg",
-        "/watch?v=1BYu65vLKdA",
-        "/watch?v=3AQ_74xrch8",
-        "/watch?v=ddqQUz9mZaM",
-        "/watch?v=vwLT6bZrHEE",
-        "/watch?v=TQKI0KE-JYY",
-        "/watch?v=dNBvQ38MlT8",
-        "/watch?v=JHxyrMgOUWI",
-        "/watch?v=l2I8NycJMCY",
-        "/watch?v=g1Zbuk1gAfk",
-        "/watch?v=zixd-si9Q-o",
-    ]
 @mock.patch("pytube.contrib.playlist.request.get")
 def test_video_urls(request_get, playlist_html):
     url = "https://www.fakeurl.com/playlist?list=whatever"
@@ -144,9 +120,68 @@ def test_trimmed(request_get, playlist_html):
     url = "https://www.fakeurl.com/playlist?list=whatever"
     request_get.return_value = playlist_html
     playlist = Playlist(url)
-    playlist._find_load_more_url = MagicMock(return_value="dummy")
     assert request_get.call_count == 1
-    assert playlist.trimmed("1BYu65vLKdA") == [
         "https://www.youtube.com/watch?v=ujTCoH21GlA",
         "https://www.youtube.com/watch?v=45ryDIPHdGg",
     ]

     )
 @mock.patch("pytube.contrib.playlist.request.get")
 def test_video_urls(request_get, playlist_html):
     url = "https://www.fakeurl.com/playlist?list=whatever"
     url = "https://www.fakeurl.com/playlist?list=whatever"
     request_get.return_value = playlist_html
     playlist = Playlist(url)
+    playlist._find_load_more_url = MagicMock(return_value=None)
     assert request_get.call_count == 1
+    trimmed = list(playlist.trimmed("1BYu65vLKdA"))
+    assert trimmed == [
         "https://www.youtube.com/watch?v=ujTCoH21GlA",
         "https://www.youtube.com/watch?v=45ryDIPHdGg",
     ]
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_playlist_failed_pagination(request_get, playlist_long_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.side_effect = [
+        playlist_long_html,
+        "{}",
+    ]
+    playlist = Playlist(url)
+    video_urls = playlist.video_urls
+    assert len(video_urls) == 100
+    assert request_get.call_count == 2
+    request_get.assert_called_with(
+        "https://www.youtube.com/browse_ajax?action_continuation=1&amp;continuation"
+        "=4qmFsgIsEhpWTFVVYS12aW9HaGUyYnRCY1puZWFQb25LQRoOZWdaUVZEcERSMUUlM0Q%253D"
+    )
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_playlist_pagination(request_get, playlist_html, playlist_long_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.side_effect = [
+        playlist_long_html,
+        '{"content_html":"<a href=\\"/watch?v=BcWz41-4cDk&amp;feature=plpp_video&amp;ved'
+        '=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", "load_more_widget_html":""}',
+        "{}",
+    ]
+    playlist = Playlist(url)
+    assert len(playlist.video_urls) == 101
+    assert request_get.call_count == 2
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_trimmed_pagination(request_get, playlist_html, playlist_long_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.side_effect = [
+        playlist_long_html,
+        '{"content_html":"<a href=\\"/watch?v=BcWz41-4cDk&amp;feature=plpp_video&amp;ved'
+        '=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", "load_more_widget_html":""}',
+        "{}",
+    ]
+    playlist = Playlist(url)
+    assert len(list(playlist.trimmed("FN9vC8aR7Yk"))) == 3
+    assert request_get.call_count == 1
+@mock.patch("pytube.contrib.playlist.request.get")
+def test_trimmed_pagination_not_found(request_get, playlist_html, playlist_long_html):
+    url = "https://www.fakeurl.com/playlist?list=whatever"
+    request_get.side_effect = [
+        playlist_long_html,
+        '{"content_html":"<a href=\\"/watch?v=BcWz41-4cDk&amp;feature=plpp_video&amp;ved'
+        '=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", "load_more_widget_html":""}',
+        "{}",
+    ]
+    playlist = Playlist(url)
+    assert len(list(playlist.trimmed("wont-be-found"))) == 101

tests/mocks/playlist_long.html.gz ADDED Viewed

Binary file (47.7 kB). View file