Taylor Fox Dahlin
commited on
Initial implementation of Channel object (#932)
Browse files* Implements a Channel object for downloading videos from a YouTube channel.
* Minor changes to the playlist class to make it more compatible to be subclassed.
* `.videos` and `.video_urls` now behave just like iterable lists, but defer web requests.
* Implements DeferredGeneratorList which converts generators to lazy list-like objects.
- pytube/__init__.py +1 -0
- pytube/contrib/channel.py +137 -0
- pytube/contrib/playlist.py +34 -15
- pytube/extract.py +31 -0
- pytube/helpers.py +95 -0
- tests/conftest.py +20 -3
- tests/contrib/test_channel.py +54 -0
- tests/mocks/channel-videos.html.gz +0 -0
pytube/__init__.py
CHANGED
@@ -15,3 +15,4 @@ from pytube.captions import Caption
|
|
15 |
from pytube.query import CaptionQuery, StreamQuery
|
16 |
from pytube.__main__ import YouTube
|
17 |
from pytube.contrib.playlist import Playlist
|
|
|
|
15 |
from pytube.query import CaptionQuery, StreamQuery
|
16 |
from pytube.__main__ import YouTube
|
17 |
from pytube.contrib.playlist import Playlist
|
18 |
+
from pytube.contrib.channel import Channel
|
pytube/contrib/channel.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""Module for interacting with a user's youtube channel."""
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
from typing import Dict, List, Optional, Tuple
|
6 |
+
|
7 |
+
from pytube import extract, Playlist, request
|
8 |
+
from pytube.helpers import uniqueify
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
class Channel(Playlist):
|
14 |
+
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
|
15 |
+
super().__init__(url, proxies)
|
16 |
+
|
17 |
+
self.channel_name = extract.channel_name(url)
|
18 |
+
|
19 |
+
self.channel_url = (
|
20 |
+
f"https://www.youtube.com/c/{self.channel_name}"
|
21 |
+
)
|
22 |
+
self.videos_url = self.channel_url + '/videos'
|
23 |
+
self.playlists_url = self.channel_url + '/playlists'
|
24 |
+
self.community_url = self.channel_url + '/community'
|
25 |
+
self.featured_channels_url = self.channel_url + '/channels'
|
26 |
+
self.about_url = self.channel_url + '/about'
|
27 |
+
|
28 |
+
# Possible future additions
|
29 |
+
self._playlists_html = None
|
30 |
+
self._community_html = None
|
31 |
+
self._featured_channels_html = None
|
32 |
+
self._about_html = None
|
33 |
+
|
34 |
+
@property
|
35 |
+
def html(self):
|
36 |
+
if self._html:
|
37 |
+
return self._html
|
38 |
+
self._html = request.get(self.videos_url)
|
39 |
+
return self._html
|
40 |
+
|
41 |
+
@property
|
42 |
+
def playlists_html(self):
|
43 |
+
if self._playlists_html:
|
44 |
+
return self._playlists_html
|
45 |
+
else:
|
46 |
+
self._playlists_html = request.get(self.playlists_url)
|
47 |
+
return self._playlists_html
|
48 |
+
|
49 |
+
@property
|
50 |
+
def community_html(self):
|
51 |
+
if self._community_html:
|
52 |
+
return self._community_html
|
53 |
+
else:
|
54 |
+
self._community_html = request.get(self.community_url)
|
55 |
+
return self._community_html
|
56 |
+
|
57 |
+
@property
|
58 |
+
def featured_channels_html(self):
|
59 |
+
if self._featured_channels_html:
|
60 |
+
return self._featured_channels_html
|
61 |
+
else:
|
62 |
+
self._featured_channels_html = request.get(self.featured_channels_url)
|
63 |
+
return self._featured_channels_html
|
64 |
+
|
65 |
+
@property
|
66 |
+
def about_html(self):
|
67 |
+
if self._about_html:
|
68 |
+
return self._about_html
|
69 |
+
else:
|
70 |
+
self._about_html = request.get(self.about_url)
|
71 |
+
return self._about_html
|
72 |
+
|
73 |
+
@staticmethod
|
74 |
+
def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
|
75 |
+
"""Extracts videos from a raw json page
|
76 |
+
|
77 |
+
:param str raw_json: Input json extracted from the page or the last
|
78 |
+
server response
|
79 |
+
:rtype: Tuple[List[str], Optional[str]]
|
80 |
+
:returns: Tuple containing a list of up to 100 video watch ids and
|
81 |
+
a continuation token, if more videos are available
|
82 |
+
"""
|
83 |
+
initial_data = json.loads(raw_json)
|
84 |
+
# this is the json tree structure, if the json was extracted from
|
85 |
+
# html
|
86 |
+
try:
|
87 |
+
videos = initial_data["contents"][
|
88 |
+
"twoColumnBrowseResultsRenderer"][
|
89 |
+
"tabs"][1]["tabRenderer"]["content"][
|
90 |
+
"sectionListRenderer"]["contents"][0][
|
91 |
+
"itemSectionRenderer"]["contents"][0][
|
92 |
+
"gridRenderer"]["items"]
|
93 |
+
except (KeyError, IndexError, TypeError):
|
94 |
+
try:
|
95 |
+
# this is the json tree structure, if the json was directly sent
|
96 |
+
# by the server in a continuation response
|
97 |
+
important_content = initial_data[1]['response']['onResponseReceivedActions'][
|
98 |
+
0
|
99 |
+
]['appendContinuationItemsAction']['continuationItems']
|
100 |
+
videos = important_content
|
101 |
+
except (KeyError, IndexError, TypeError):
|
102 |
+
try:
|
103 |
+
# this is the json tree structure, if the json was directly sent
|
104 |
+
# by the server in a continuation response
|
105 |
+
# no longer a list and no longer has the "response" key
|
106 |
+
important_content = initial_data['onResponseReceivedActions'][0][
|
107 |
+
'appendContinuationItemsAction']['continuationItems']
|
108 |
+
videos = important_content
|
109 |
+
except (KeyError, IndexError, TypeError) as p:
|
110 |
+
logger.info(p)
|
111 |
+
return [], None
|
112 |
+
|
113 |
+
try:
|
114 |
+
continuation = videos[-1]['continuationItemRenderer'][
|
115 |
+
'continuationEndpoint'
|
116 |
+
]['continuationCommand']['token']
|
117 |
+
videos = videos[:-1]
|
118 |
+
except (KeyError, IndexError):
|
119 |
+
# if there is an error, no continuation is available
|
120 |
+
continuation = None
|
121 |
+
|
122 |
+
# remove duplicates
|
123 |
+
return (
|
124 |
+
uniqueify(
|
125 |
+
list(
|
126 |
+
# only extract the video ids from the video data
|
127 |
+
map(
|
128 |
+
lambda x: (
|
129 |
+
f"/watch?v="
|
130 |
+
f"{x['gridVideoRenderer']['videoId']}"
|
131 |
+
),
|
132 |
+
videos
|
133 |
+
)
|
134 |
+
),
|
135 |
+
),
|
136 |
+
continuation,
|
137 |
+
)
|
pytube/contrib/playlist.py
CHANGED
@@ -7,7 +7,7 @@ from datetime import date, datetime
|
|
7 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
8 |
|
9 |
from pytube import extract, request, YouTube
|
10 |
-
from pytube.helpers import cache, install_proxy, regex_search, uniqueify
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
@@ -19,15 +19,24 @@ class Playlist(Sequence):
|
|
19 |
if proxies:
|
20 |
install_proxy(proxies)
|
21 |
|
|
|
|
|
22 |
# These need to be initialized as None for the properties.
|
23 |
self._html = None
|
24 |
self._ytcfg = None
|
25 |
|
26 |
-
self.
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
@property
|
33 |
def html(self):
|
@@ -175,7 +184,7 @@ class Playlist(Sequence):
|
|
175 |
'appendContinuationItemsAction']['continuationItems']
|
176 |
videos = important_content
|
177 |
except (KeyError, IndexError, TypeError) as p:
|
178 |
-
|
179 |
return [], None
|
180 |
|
181 |
try:
|
@@ -218,27 +227,37 @@ class Playlist(Sequence):
|
|
218 |
for page in self._paginate(until_watch_id=video_id):
|
219 |
yield from (self._video_url(watch_path) for watch_path in page)
|
220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
@property # type: ignore
|
222 |
@cache
|
223 |
-
def video_urls(self) ->
|
224 |
"""Complete links of all the videos in playlist
|
225 |
|
226 |
:rtype: List[str]
|
227 |
:returns: List of video URLs
|
228 |
"""
|
229 |
-
return
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
|
235 |
@property
|
236 |
def videos(self) -> Iterable[YouTube]:
|
237 |
"""Yields YouTube objects of videos in this playlist
|
238 |
|
239 |
-
:
|
|
|
240 |
"""
|
241 |
-
|
242 |
|
243 |
def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
|
244 |
return self.video_urls[i]
|
@@ -247,7 +266,7 @@ class Playlist(Sequence):
|
|
247 |
return len(self.video_urls)
|
248 |
|
249 |
def __repr__(self) -> str:
|
250 |
-
return f"{self.video_urls}"
|
251 |
|
252 |
@property
|
253 |
@cache
|
|
|
7 |
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
8 |
|
9 |
from pytube import extract, request, YouTube
|
10 |
+
from pytube.helpers import cache, DeferredGeneratorList, install_proxy, regex_search, uniqueify
|
11 |
|
12 |
logger = logging.getLogger(__name__)
|
13 |
|
|
|
19 |
if proxies:
|
20 |
install_proxy(proxies)
|
21 |
|
22 |
+
self._input_url = url
|
23 |
+
|
24 |
# These need to be initialized as None for the properties.
|
25 |
self._html = None
|
26 |
self._ytcfg = None
|
27 |
|
28 |
+
self._playlist_id = None
|
29 |
|
30 |
+
@property
|
31 |
+
def playlist_id(self):
|
32 |
+
if self._playlist_id:
|
33 |
+
return self._playlist_id
|
34 |
+
self._playlist_id = extract.playlist_id(self._input_url)
|
35 |
+
return self._playlist_id
|
36 |
+
|
37 |
+
@property
|
38 |
+
def playlist_url(self):
|
39 |
+
return f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
40 |
|
41 |
@property
|
42 |
def html(self):
|
|
|
184 |
'appendContinuationItemsAction']['continuationItems']
|
185 |
videos = important_content
|
186 |
except (KeyError, IndexError, TypeError) as p:
|
187 |
+
logger.info(p)
|
188 |
return [], None
|
189 |
|
190 |
try:
|
|
|
227 |
for page in self._paginate(until_watch_id=video_id):
|
228 |
yield from (self._video_url(watch_path) for watch_path in page)
|
229 |
|
230 |
+
def url_generator(self):
|
231 |
+
"""Generator that yields video URLs.
|
232 |
+
|
233 |
+
:Yields: Video URLs
|
234 |
+
"""
|
235 |
+
for page in self._paginate():
|
236 |
+
for video in page:
|
237 |
+
yield self._video_url(video)
|
238 |
+
|
239 |
@property # type: ignore
|
240 |
@cache
|
241 |
+
def video_urls(self) -> DeferredGeneratorList:
|
242 |
"""Complete links of all the videos in playlist
|
243 |
|
244 |
:rtype: List[str]
|
245 |
:returns: List of video URLs
|
246 |
"""
|
247 |
+
return DeferredGeneratorList(self.url_generator())
|
248 |
+
|
249 |
+
def videos_generator(self):
|
250 |
+
for url in self.video_urls:
|
251 |
+
yield YouTube(url)
|
252 |
|
253 |
@property
|
254 |
def videos(self) -> Iterable[YouTube]:
|
255 |
"""Yields YouTube objects of videos in this playlist
|
256 |
|
257 |
+
:rtype: List[YouTube]
|
258 |
+
:returns: List of YouTube
|
259 |
"""
|
260 |
+
return DeferredGeneratorList(self.videos_generator())
|
261 |
|
262 |
def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
|
263 |
return self.video_urls[i]
|
|
|
266 |
return len(self.video_urls)
|
267 |
|
268 |
def __repr__(self) -> str:
|
269 |
+
return f"{repr(self.video_urls)}"
|
270 |
|
271 |
@property
|
272 |
@cache
|
pytube/extract.py
CHANGED
@@ -178,6 +178,37 @@ def playlist_id(url: str) -> str:
|
|
178 |
return parse_qs(parsed.query)['list'][0]
|
179 |
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
def video_info_url(video_id: str, watch_url: str) -> str:
|
182 |
"""Construct the video_info url.
|
183 |
|
|
|
178 |
return parse_qs(parsed.query)['list'][0]
|
179 |
|
180 |
|
181 |
+
def channel_name(url: str) -> str:
|
182 |
+
"""Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
|
183 |
+
|
184 |
+
This function supports the following patterns:
|
185 |
+
|
186 |
+
- :samp:`https://youtube.com/c/{channel_name}/*`
|
187 |
+
- :samp:`https://youtube.com/channel/{channel_id}/*
|
188 |
+
|
189 |
+
:param str url:
|
190 |
+
A YouTube url containing a channel name.
|
191 |
+
:rtype: str
|
192 |
+
:returns:
|
193 |
+
YouTube channel name.
|
194 |
+
"""
|
195 |
+
patterns = [
|
196 |
+
r"(?:\/c\/([\d\w_\-]+)(\/.*)?)",
|
197 |
+
r"(?:\/channel\/([\w\d_\-]+)(\/.*)?)"
|
198 |
+
]
|
199 |
+
for pattern in patterns:
|
200 |
+
regex = re.compile(pattern)
|
201 |
+
function_match = regex.search(url)
|
202 |
+
if function_match:
|
203 |
+
logger.debug("finished regex search, matched: %s", pattern)
|
204 |
+
channel_id = function_match.group(1)
|
205 |
+
return channel_id
|
206 |
+
|
207 |
+
raise RegexMatchError(
|
208 |
+
caller="channel_name", pattern="patterns"
|
209 |
+
)
|
210 |
+
|
211 |
+
|
212 |
def video_info_url(video_id: str, watch_url: str) -> str:
|
213 |
"""Construct the video_info url.
|
214 |
|
pytube/helpers.py
CHANGED
@@ -14,6 +14,101 @@ from pytube.exceptions import RegexMatchError
|
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def regex_search(pattern: str, string: str, group: int) -> str:
|
18 |
"""Shortcut method to search a string for a given pattern.
|
19 |
|
|
|
14 |
logger = logging.getLogger(__name__)
|
15 |
|
16 |
|
17 |
+
class DeferredGeneratorList:
|
18 |
+
"""A wrapper class for deferring list generation.
|
19 |
+
|
20 |
+
Pytube has some continuation generators that create web calls, which means
|
21 |
+
that any time a full list is requested, all of those web calls must be
|
22 |
+
made at once, which could lead to slowdowns. This will allow individual
|
23 |
+
elements to be queried, so that slowdowns only happen as necessary. For
|
24 |
+
example, you can iterate over elements in the list without accessing them
|
25 |
+
all simultaneously. This should allow for speed improvements for playlist
|
26 |
+
and channel interactions.
|
27 |
+
"""
|
28 |
+
def __init__(self, generator):
|
29 |
+
"""Construct a :class:`DeferredGeneratorList <DeferredGeneratorList>`.
|
30 |
+
|
31 |
+
:param generator generator:
|
32 |
+
The deferrable generator to create a wrapper for.
|
33 |
+
:param func func:
|
34 |
+
(Optional) A function to call on the generator items to produce the list.
|
35 |
+
"""
|
36 |
+
self.gen = generator
|
37 |
+
self._elements = []
|
38 |
+
|
39 |
+
def __eq__(self, other):
|
40 |
+
"""We want to mimic list behavior for comparison."""
|
41 |
+
return list(self) == other
|
42 |
+
|
43 |
+
def __getitem__(self, key) -> Any:
|
44 |
+
"""Only generate items as they're asked for."""
|
45 |
+
# We only allow querying with indexes.
|
46 |
+
if not isinstance(key, (int, slice)):
|
47 |
+
raise TypeError('Key must be either a slice or int.')
|
48 |
+
|
49 |
+
# Convert int keys to slice
|
50 |
+
key_slice = key
|
51 |
+
if isinstance(key, int):
|
52 |
+
key_slice = slice(key, key + 1, 1)
|
53 |
+
|
54 |
+
# Generate all elements up to the final item
|
55 |
+
while len(self._elements) < key_slice.stop:
|
56 |
+
try:
|
57 |
+
next_item = next(self.gen)
|
58 |
+
except StopIteration:
|
59 |
+
# If we can't find enough elements for the slice, raise an IndexError
|
60 |
+
raise IndexError
|
61 |
+
else:
|
62 |
+
self._elements.append(next_item)
|
63 |
+
|
64 |
+
return self._elements[key]
|
65 |
+
|
66 |
+
def __iter__(self):
|
67 |
+
"""Custom iterator for dynamically generated list."""
|
68 |
+
iter_index = 0
|
69 |
+
while True:
|
70 |
+
try:
|
71 |
+
curr_item = self[iter_index]
|
72 |
+
except IndexError:
|
73 |
+
return
|
74 |
+
else:
|
75 |
+
yield curr_item
|
76 |
+
iter_index += 1
|
77 |
+
|
78 |
+
def __next__(self) -> Any:
|
79 |
+
"""Fetch next element in iterator."""
|
80 |
+
try:
|
81 |
+
curr_element = self[self.iter_index]
|
82 |
+
except IndexError:
|
83 |
+
raise StopIteration
|
84 |
+
self.iter_index += 1
|
85 |
+
return curr_element # noqa:R504
|
86 |
+
|
87 |
+
def __len__(self) -> int:
|
88 |
+
"""Return length of list of all items."""
|
89 |
+
self.generate_all()
|
90 |
+
return len(self._elements)
|
91 |
+
|
92 |
+
def __repr__(self) -> str:
|
93 |
+
"""String representation of all items."""
|
94 |
+
self.generate_all()
|
95 |
+
return str(self._elements)
|
96 |
+
|
97 |
+
def __reversed__(self):
|
98 |
+
self.generate_all()
|
99 |
+
return self._elements[::-1]
|
100 |
+
|
101 |
+
def generate_all(self):
|
102 |
+
"""Generate all items."""
|
103 |
+
while True:
|
104 |
+
try:
|
105 |
+
next_item = next(self.gen)
|
106 |
+
except StopIteration:
|
107 |
+
break
|
108 |
+
else:
|
109 |
+
self._elements.append(next_item)
|
110 |
+
|
111 |
+
|
112 |
def regex_search(pattern: str, string: str, group: int) -> str:
|
113 |
"""Shortcut method to search a string for a given pattern.
|
114 |
|
tests/conftest.py
CHANGED
@@ -91,7 +91,8 @@ def region_blocked():
|
|
91 |
@pytest.fixture
|
92 |
def playlist_html():
|
93 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
94 |
-
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
|
|
95 |
file_path = os.path.join(
|
96 |
os.path.dirname(os.path.realpath(__file__)),
|
97 |
"mocks",
|
@@ -104,7 +105,8 @@ def playlist_html():
|
|
104 |
@pytest.fixture
|
105 |
def playlist_long_html():
|
106 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
107 |
-
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
|
|
108 |
file_path = os.path.join(
|
109 |
os.path.dirname(os.path.realpath(__file__)),
|
110 |
"mocks",
|
@@ -117,7 +119,8 @@ def playlist_long_html():
|
|
117 |
@pytest.fixture
|
118 |
def playlist_submenu_html():
|
119 |
"""Youtube playlist HTML loaded on 2020-01-24 from
|
120 |
-
https://www.youtube.com/playlist?list=PLZHQObOWTQDMsr9K-rj53DwVRMYO3t5Yr
|
|
|
121 |
file_path = os.path.join(
|
122 |
os.path.dirname(os.path.realpath(__file__)),
|
123 |
"mocks",
|
@@ -138,3 +141,17 @@ def stream_dict():
|
|
138 |
with gzip.open(file_path, "rb") as f:
|
139 |
content = json.loads(f.read().decode("utf-8"))
|
140 |
return content['watch_html']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
@pytest.fixture
|
92 |
def playlist_html():
|
93 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
94 |
+
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
95 |
+
"""
|
96 |
file_path = os.path.join(
|
97 |
os.path.dirname(os.path.realpath(__file__)),
|
98 |
"mocks",
|
|
|
105 |
@pytest.fixture
|
106 |
def playlist_long_html():
|
107 |
"""Youtube playlist HTML loaded on 2020-01-25 from
|
108 |
+
https://www.youtube.com/playlist?list=PLzMcBGfZo4-mP7qA9cagf68V06sko5otr
|
109 |
+
"""
|
110 |
file_path = os.path.join(
|
111 |
os.path.dirname(os.path.realpath(__file__)),
|
112 |
"mocks",
|
|
|
119 |
@pytest.fixture
|
120 |
def playlist_submenu_html():
|
121 |
"""Youtube playlist HTML loaded on 2020-01-24 from
|
122 |
+
https://www.youtube.com/playlist?list=PLZHQObOWTQDMsr9K-rj53DwVRMYO3t5Yr
|
123 |
+
"""
|
124 |
file_path = os.path.join(
|
125 |
os.path.dirname(os.path.realpath(__file__)),
|
126 |
"mocks",
|
|
|
141 |
with gzip.open(file_path, "rb") as f:
|
142 |
content = json.loads(f.read().decode("utf-8"))
|
143 |
return content['watch_html']
|
144 |
+
|
145 |
+
|
146 |
+
@pytest.fixture
|
147 |
+
def channel_videos_html():
|
148 |
+
"""Youtube channel HTML loaded on 2021-05-05 from
|
149 |
+
https://www.youtube.com/c/ProgrammingKnowledge/videos
|
150 |
+
"""
|
151 |
+
file_path = os.path.join(
|
152 |
+
os.path.dirname(os.path.realpath(__file__)),
|
153 |
+
"mocks",
|
154 |
+
"channel-videos.html.gz",
|
155 |
+
)
|
156 |
+
with gzip.open(file_path, 'rb') as f:
|
157 |
+
return f.read().decode('utf-8')
|
tests/contrib/test_channel.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from unittest import mock
|
2 |
+
|
3 |
+
from pytube import Channel
|
4 |
+
|
5 |
+
|
6 |
+
@mock.patch('pytube.request.get')
|
7 |
+
def test_init_with_url(request_get, channel_videos_html):
|
8 |
+
request_get.return_value = channel_videos_html
|
9 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge/videos')
|
10 |
+
assert c.channel_url == 'https://www.youtube.com/c/ProgrammingKnowledge'
|
11 |
+
assert c.videos_url == f'{c.channel_url}/videos'
|
12 |
+
assert c.playlists_url == f'{c.channel_url}/playlists'
|
13 |
+
assert c.community_url == f'{c.channel_url}/community'
|
14 |
+
assert c.featured_channels_url == f'{c.channel_url}/channels'
|
15 |
+
assert c.about_url == f'{c.channel_url}/about'
|
16 |
+
|
17 |
+
|
18 |
+
@mock.patch('pytube.request.get')
|
19 |
+
def test_channel_name(request_get, channel_videos_html):
|
20 |
+
request_get.return_value = channel_videos_html
|
21 |
+
|
22 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge/videos')
|
23 |
+
assert c.channel_name == 'ProgrammingKnowledge'
|
24 |
+
|
25 |
+
|
26 |
+
@mock.patch('pytube.request.get')
|
27 |
+
def test_channel_video_list(request_get, channel_videos_html):
|
28 |
+
request_get.return_value = channel_videos_html
|
29 |
+
|
30 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge/videos')
|
31 |
+
first_ten = [
|
32 |
+
'https://www.youtube.com/watch?v=t_xLpJo_35k',
|
33 |
+
'https://www.youtube.com/watch?v=ccbh5YhxouQ',
|
34 |
+
'https://www.youtube.com/watch?v=wDnFjDjxW_0',
|
35 |
+
'https://www.youtube.com/watch?v=F3W_p_4XftA',
|
36 |
+
'https://www.youtube.com/watch?v=_fxm0xGGEi4',
|
37 |
+
'https://www.youtube.com/watch?v=cRbKZzcuIsg',
|
38 |
+
'https://www.youtube.com/watch?v=sdDu3dfIuow',
|
39 |
+
'https://www.youtube.com/watch?v=10KIbp-gJCE',
|
40 |
+
'https://www.youtube.com/watch?v=wZIT-cRtd6s',
|
41 |
+
'https://www.youtube.com/watch?v=KucCvEbTj0w',
|
42 |
+
]
|
43 |
+
assert c.video_urls[:10] == first_ten
|
44 |
+
|
45 |
+
|
46 |
+
@mock.patch('pytube.request.get')
|
47 |
+
def test_videos_html(request_get, channel_videos_html):
|
48 |
+
request_get.return_value = channel_videos_html
|
49 |
+
|
50 |
+
c = Channel('https://www.youtube.com/c/ProgrammingKnowledge')
|
51 |
+
assert c.html == channel_videos_html
|
52 |
+
|
53 |
+
# Because the Channel object subclasses the Playlist object, most of the tests
|
54 |
+
# are already taken care of by the Playlist test suite.
|
tests/mocks/channel-videos.html.gz
ADDED
Binary file (48.6 kB). View file
|
|