Merge pull request #40 from hbmartin/reformatting
Browse files- .flake8 +4 -2
- Makefile +2 -1
- Pipfile +7 -0
- pytube/__main__.py +19 -51
- pytube/captions.py +0 -3
- pytube/cipher.py +11 -11
- pytube/contrib/playlist.py +34 -45
- pytube/exceptions.py +1 -0
- pytube/helpers.py +1 -0
- pytube/monostate.py +2 -2
- pytube/query.py +2 -0
- pytube/request.py +3 -4
- pytube/streams.py +1 -1
- pytube/version.py +2 -0
- tests/test_streams.py +5 -9
.flake8
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
[flake8]
|
2 |
-
ignore = E231,E203,W503
|
3 |
-
max-line-length = 89
|
|
|
|
|
|
1 |
[flake8]
|
2 |
+
ignore = E231,E203,W503,Q000,WPS111,WPS305,WPS348,WPS602,D400,DAR201,S101,DAR101,C812,D104,I001,WPS306,WPS214,D401,WPS229,WPS420,WPS230,WPS414,WPS114,WPS226,WPS442,C819,WPS601,T001,RST304,WPS410,WPS428,A003,A002,I003,WPS221,WPS326,WPS201,S405,DAR301,WPS210,WPS202,WPS213,WPS301,P103
|
3 |
+
max-line-length = 89
|
4 |
+
|
5 |
+
[isort]
|
Makefile
CHANGED
@@ -9,7 +9,8 @@ pipenv:
|
|
9 |
pipenv install --dev
|
10 |
|
11 |
test:
|
12 |
-
pipenv run flake8
|
|
|
13 |
pipenv run black pytube --check
|
14 |
pipenv run black tests --check
|
15 |
pipenv run mypy pytube
|
|
|
9 |
pipenv install --dev
|
10 |
|
11 |
test:
|
12 |
+
pipenv run flake8 pytube/
|
13 |
+
pipenv run flake8 tests/
|
14 |
pipenv run black pytube --check
|
15 |
pipenv run black tests --check
|
16 |
pipenv run mypy pytube
|
Pipfile
CHANGED
@@ -17,3 +17,10 @@ sphinx_rtd_theme = "*"
|
|
17 |
mypy = "*"
|
18 |
black = "==19.10b0"
|
19 |
codecov = "*"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
mypy = "*"
|
18 |
black = "==19.10b0"
|
19 |
codecov = "*"
|
20 |
+
flake8-bugbear ="*"
|
21 |
+
flake8-comprehensions ="*"
|
22 |
+
flake8-eradicate = "*"
|
23 |
+
flake8-broken-line = "*"
|
24 |
+
pep8-naming = "*"
|
25 |
+
flake8-string-format = "*"
|
26 |
+
flake8-quotes = "*"
|
pytube/__main__.py
CHANGED
@@ -67,12 +67,12 @@ class YouTube:
|
|
67 |
self.watch_html: Optional[str] = None # the html of /watch?v=<video_id>
|
68 |
self.embed_html: Optional[str] = None
|
69 |
self.player_config_args: Dict = {} # inline js in the html containing
|
|
|
70 |
# streams
|
71 |
self.age_restricted: Optional[bool] = None
|
72 |
self.vid_descr: Optional[str] = None
|
73 |
|
74 |
self.fmt_streams: List[Stream] = []
|
75 |
-
self.caption_tracks: List[Caption] = []
|
76 |
|
77 |
# video_id part of /watch?v=<video_id>
|
78 |
self.video_id = extract.video_id(url)
|
@@ -153,11 +153,9 @@ class YouTube:
|
|
153 |
self.initialize_stream_objects(fmt)
|
154 |
|
155 |
# load the player_response object (contains subtitle information)
|
156 |
-
self.
|
157 |
-
|
158 |
-
)
|
159 |
|
160 |
-
self.initialize_caption_objects()
|
161 |
logger.info("init finished successfully")
|
162 |
|
163 |
def prefetch(self) -> None:
|
@@ -173,8 +171,7 @@ class YouTube:
|
|
173 |
self.watch_html = request.get(url=self.watch_url)
|
174 |
if (
|
175 |
self.watch_html is None
|
176 |
-
or '<img class="icon meh" src="/yts/img'
|
177 |
-
not in self.watch_html # noqa: W503
|
178 |
):
|
179 |
raise VideoUnavailable(video_id=self.video_id)
|
180 |
|
@@ -214,26 +211,18 @@ class YouTube:
|
|
214 |
)
|
215 |
self.fmt_streams.append(video)
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
Take the unscrambled player response data, and use it to initialize
|
221 |
-
instances of :class:`Caption <Caption>`.
|
222 |
-
|
223 |
-
:rtype: None
|
224 |
|
|
|
225 |
"""
|
226 |
-
|
227 |
-
|
228 |
-
# https://github.com/nficano/pytube/issues/167
|
229 |
-
caption_tracks = (
|
230 |
-
self.player_config_args.get("player_response", {})
|
231 |
-
.get("captions", {})
|
232 |
.get("playerCaptionsTracklistRenderer", {})
|
233 |
.get("captionTracks", [])
|
234 |
)
|
235 |
-
for
|
236 |
-
self.caption_tracks.append(Caption(caption_track))
|
237 |
|
238 |
@property
|
239 |
def captions(self) -> CaptionQuery:
|
@@ -258,9 +247,8 @@ class YouTube:
|
|
258 |
:rtype: str
|
259 |
|
260 |
"""
|
261 |
-
player_response = self.player_config_args.get("player_response", {})
|
262 |
thumbnail_details = (
|
263 |
-
player_response.get("videoDetails", {})
|
264 |
.get("thumbnail", {})
|
265 |
.get("thumbnails")
|
266 |
)
|
@@ -268,7 +256,7 @@ class YouTube:
|
|
268 |
thumbnail_details = thumbnail_details[-1] # last item has max size
|
269 |
return thumbnail_details["url"]
|
270 |
|
271 |
-
return "https://img.youtube.com/vi/
|
272 |
|
273 |
@property
|
274 |
def title(self) -> str:
|
@@ -278,9 +266,7 @@ class YouTube:
|
|
278 |
|
279 |
"""
|
280 |
return self.player_config_args.get("title") or (
|
281 |
-
self.
|
282 |
-
.get("videoDetails", {})
|
283 |
-
.get("title")
|
284 |
)
|
285 |
|
286 |
@property
|
@@ -291,9 +277,7 @@ class YouTube:
|
|
291 |
|
292 |
"""
|
293 |
return self.vid_descr or (
|
294 |
-
self.
|
295 |
-
.get("videoDetails", {})
|
296 |
-
.get("shortDescription")
|
297 |
)
|
298 |
|
299 |
@property
|
@@ -303,11 +287,7 @@ class YouTube:
|
|
303 |
:rtype: float
|
304 |
|
305 |
"""
|
306 |
-
return (
|
307 |
-
self.player_config_args.get("player_response", {})
|
308 |
-
.get("videoDetails", {})
|
309 |
-
.get("averageRating")
|
310 |
-
)
|
311 |
|
312 |
@property
|
313 |
def length(self) -> int:
|
@@ -318,11 +298,7 @@ class YouTube:
|
|
318 |
"""
|
319 |
return int(
|
320 |
self.player_config_args.get("length_seconds")
|
321 |
-
or (
|
322 |
-
self.player_config_args.get("player_response", {})
|
323 |
-
.get("videoDetails", {})
|
324 |
-
.get("lengthSeconds")
|
325 |
-
)
|
326 |
)
|
327 |
|
328 |
@property
|
@@ -332,22 +308,14 @@ class YouTube:
|
|
332 |
:rtype: str
|
333 |
|
334 |
"""
|
335 |
-
return int(
|
336 |
-
self.player_config_args.get("player_response", {})
|
337 |
-
.get("videoDetails", {})
|
338 |
-
.get("viewCount")
|
339 |
-
)
|
340 |
|
341 |
@property
|
342 |
def author(self) -> str:
|
343 |
"""Get the video author.
|
344 |
:rtype: str
|
345 |
"""
|
346 |
-
return (
|
347 |
-
self.player_config_args.get("player_response", {})
|
348 |
-
.get("videoDetails", {})
|
349 |
-
.get("author", "unknown")
|
350 |
-
)
|
351 |
|
352 |
def register_on_progress_callback(self, func: OnProgress):
|
353 |
"""Register a download progress callback function post initialization.
|
|
|
67 |
self.watch_html: Optional[str] = None # the html of /watch?v=<video_id>
|
68 |
self.embed_html: Optional[str] = None
|
69 |
self.player_config_args: Dict = {} # inline js in the html containing
|
70 |
+
self.player_response: Dict = {}
|
71 |
# streams
|
72 |
self.age_restricted: Optional[bool] = None
|
73 |
self.vid_descr: Optional[str] = None
|
74 |
|
75 |
self.fmt_streams: List[Stream] = []
|
|
|
76 |
|
77 |
# video_id part of /watch?v=<video_id>
|
78 |
self.video_id = extract.video_id(url)
|
|
|
153 |
self.initialize_stream_objects(fmt)
|
154 |
|
155 |
# load the player_response object (contains subtitle information)
|
156 |
+
self.player_response = json.loads(self.player_config_args["player_response"])
|
157 |
+
del self.player_config_args["player_response"]
|
|
|
158 |
|
|
|
159 |
logger.info("init finished successfully")
|
160 |
|
161 |
def prefetch(self) -> None:
|
|
|
171 |
self.watch_html = request.get(url=self.watch_url)
|
172 |
if (
|
173 |
self.watch_html is None
|
174 |
+
or '<img class="icon meh" src="/yts/img' not in self.watch_html
|
|
|
175 |
):
|
176 |
raise VideoUnavailable(video_id=self.video_id)
|
177 |
|
|
|
211 |
)
|
212 |
self.fmt_streams.append(video)
|
213 |
|
214 |
+
@property
|
215 |
+
def caption_tracks(self) -> List[Caption]:
|
216 |
+
"""Get a list of :class:`Caption <Caption>`.
|
|
|
|
|
|
|
|
|
217 |
|
218 |
+
:rtype: List[Caption]
|
219 |
"""
|
220 |
+
raw_tracks = (
|
221 |
+
self.player_response.get("captions", {})
|
|
|
|
|
|
|
|
|
222 |
.get("playerCaptionsTracklistRenderer", {})
|
223 |
.get("captionTracks", [])
|
224 |
)
|
225 |
+
return [Caption(track) for track in raw_tracks]
|
|
|
226 |
|
227 |
@property
|
228 |
def captions(self) -> CaptionQuery:
|
|
|
247 |
:rtype: str
|
248 |
|
249 |
"""
|
|
|
250 |
thumbnail_details = (
|
251 |
+
self.player_response.get("videoDetails", {})
|
252 |
.get("thumbnail", {})
|
253 |
.get("thumbnails")
|
254 |
)
|
|
|
256 |
thumbnail_details = thumbnail_details[-1] # last item has max size
|
257 |
return thumbnail_details["url"]
|
258 |
|
259 |
+
return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
|
260 |
|
261 |
@property
|
262 |
def title(self) -> str:
|
|
|
266 |
|
267 |
"""
|
268 |
return self.player_config_args.get("title") or (
|
269 |
+
self.player_response.get("videoDetails", {}).get("title")
|
|
|
|
|
270 |
)
|
271 |
|
272 |
@property
|
|
|
277 |
|
278 |
"""
|
279 |
return self.vid_descr or (
|
280 |
+
self.player_response.get("videoDetails", {}).get("shortDescription")
|
|
|
|
|
281 |
)
|
282 |
|
283 |
@property
|
|
|
287 |
:rtype: float
|
288 |
|
289 |
"""
|
290 |
+
return self.player_response.get("videoDetails", {}).get("averageRating")
|
|
|
|
|
|
|
|
|
291 |
|
292 |
@property
|
293 |
def length(self) -> int:
|
|
|
298 |
"""
|
299 |
return int(
|
300 |
self.player_config_args.get("length_seconds")
|
301 |
+
or (self.player_response.get("videoDetails", {}).get("lengthSeconds"))
|
|
|
|
|
|
|
|
|
302 |
)
|
303 |
|
304 |
@property
|
|
|
308 |
:rtype: str
|
309 |
|
310 |
"""
|
311 |
+
return int(self.player_response.get("videoDetails", {}).get("viewCount"))
|
|
|
|
|
|
|
|
|
312 |
|
313 |
@property
|
314 |
def author(self) -> str:
|
315 |
"""Get the video author.
|
316 |
:rtype: str
|
317 |
"""
|
318 |
+
return self.player_response.get("videoDetails", {}).get("author", "unknown")
|
|
|
|
|
|
|
|
|
319 |
|
320 |
def register_on_progress_callback(self, func: OnProgress):
|
321 |
"""Register a download progress callback function post initialization.
|
pytube/captions.py
CHANGED
@@ -4,10 +4,8 @@ import os
|
|
4 |
import time
|
5 |
import xml.etree.ElementTree as ElementTree
|
6 |
from typing import Dict, Optional
|
7 |
-
|
8 |
from pytube import request
|
9 |
from html import unescape
|
10 |
-
|
11 |
from pytube.helpers import safe_filename, target_directory
|
12 |
|
13 |
|
@@ -105,7 +103,6 @@ class Caption:
|
|
105 |
:type filename_prefix: str or None
|
106 |
|
107 |
:rtype: str
|
108 |
-
|
109 |
"""
|
110 |
if title.endswith(".srt") or title.endswith(".xml"):
|
111 |
filename = ".".join(title.split(".")[:-1])
|
|
|
4 |
import time
|
5 |
import xml.etree.ElementTree as ElementTree
|
6 |
from typing import Dict, Optional
|
|
|
7 |
from pytube import request
|
8 |
from html import unescape
|
|
|
9 |
from pytube.helpers import safe_filename, target_directory
|
10 |
|
11 |
|
|
|
103 |
:type filename_prefix: str or None
|
104 |
|
105 |
:rtype: str
|
|
|
106 |
"""
|
107 |
if title.endswith(".srt") or title.endswith(".xml"):
|
108 |
filename = ".".join(title.split(".")[:-1])
|
pytube/cipher.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
"""
|
3 |
This module contains all logic necessary to decipher the signature.
|
4 |
|
@@ -26,7 +27,6 @@ logger = create_logger()
|
|
26 |
|
27 |
def get_initial_function_name(js: str) -> str:
|
28 |
"""Extract the name of the function responsible for computing the signature.
|
29 |
-
|
30 |
:param str js:
|
31 |
The contents of the base.js asset file.
|
32 |
:rtype: str
|
@@ -52,10 +52,10 @@ def get_initial_function_name(js: str) -> str:
|
|
52 |
logger.debug("finding initial function name")
|
53 |
for pattern in function_patterns:
|
54 |
regex = re.compile(pattern)
|
55 |
-
|
56 |
-
if
|
57 |
logger.debug("finished regex search, matched: %s", pattern)
|
58 |
-
return
|
59 |
|
60 |
raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")
|
61 |
|
@@ -112,11 +112,11 @@ def get_transform_object(js: str, var: str) -> List[str]:
|
|
112 |
pattern = r"var %s={(.*?)};" % re.escape(var)
|
113 |
logger.debug("getting transform object")
|
114 |
regex = re.compile(pattern, flags=re.DOTALL)
|
115 |
-
|
116 |
-
if not
|
117 |
raise RegexMatchError(caller="get_transform_object", pattern=pattern)
|
118 |
|
119 |
-
return
|
120 |
|
121 |
|
122 |
def get_transform_map(js: str, var: str) -> Dict:
|
@@ -245,10 +245,10 @@ def parse_function(js_func: str) -> Tuple[str, int]:
|
|
245 |
logger.debug("parsing transform function")
|
246 |
pattern = r"\w+\.(\w+)\(\w,(\d+)\)"
|
247 |
regex = re.compile(pattern)
|
248 |
-
|
249 |
-
if not
|
250 |
raise RegexMatchError(caller="parse_function", pattern=pattern)
|
251 |
-
fn_name, fn_arg =
|
252 |
return fn_name, int(fn_arg)
|
253 |
|
254 |
|
@@ -269,7 +269,7 @@ def get_signature(js: str, ciphered_signature: str) -> str:
|
|
269 |
transform_plan = get_transform_plan(js)
|
270 |
var, _ = transform_plan[0].split(".")
|
271 |
transform_map = get_transform_map(js, var)
|
272 |
-
signature =
|
273 |
|
274 |
for js_func in transform_plan:
|
275 |
name, argument = parse_function(js_func)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
"""
|
4 |
This module contains all logic necessary to decipher the signature.
|
5 |
|
|
|
27 |
|
28 |
def get_initial_function_name(js: str) -> str:
|
29 |
"""Extract the name of the function responsible for computing the signature.
|
|
|
30 |
:param str js:
|
31 |
The contents of the base.js asset file.
|
32 |
:rtype: str
|
|
|
52 |
logger.debug("finding initial function name")
|
53 |
for pattern in function_patterns:
|
54 |
regex = re.compile(pattern)
|
55 |
+
function_match = regex.search(js)
|
56 |
+
if function_match:
|
57 |
logger.debug("finished regex search, matched: %s", pattern)
|
58 |
+
return function_match.group(1)
|
59 |
|
60 |
raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")
|
61 |
|
|
|
112 |
pattern = r"var %s={(.*?)};" % re.escape(var)
|
113 |
logger.debug("getting transform object")
|
114 |
regex = re.compile(pattern, flags=re.DOTALL)
|
115 |
+
transform_match = regex.search(js)
|
116 |
+
if not transform_match:
|
117 |
raise RegexMatchError(caller="get_transform_object", pattern=pattern)
|
118 |
|
119 |
+
return transform_match.group(1).replace("\n", " ").split(", ")
|
120 |
|
121 |
|
122 |
def get_transform_map(js: str, var: str) -> Dict:
|
|
|
245 |
logger.debug("parsing transform function")
|
246 |
pattern = r"\w+\.(\w+)\(\w,(\d+)\)"
|
247 |
regex = re.compile(pattern)
|
248 |
+
parse_match = regex.search(js_func)
|
249 |
+
if not parse_match:
|
250 |
raise RegexMatchError(caller="parse_function", pattern=pattern)
|
251 |
+
fn_name, fn_arg = parse_match.groups()
|
252 |
return fn_name, int(fn_arg)
|
253 |
|
254 |
|
|
|
269 |
transform_plan = get_transform_plan(js)
|
270 |
var, _ = transform_plan[0].split(".")
|
271 |
transform_map = get_transform_map(js, var)
|
272 |
+
signature = list(ciphered_signature)
|
273 |
|
274 |
for js_func in transform_plan:
|
275 |
name, argument = parse_function(js_func)
|
pytube/contrib/playlist.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
|
|
|
3 |
|
4 |
import json
|
5 |
import logging
|
@@ -15,9 +16,7 @@ logger = logging.getLogger(__name__)
|
|
15 |
|
16 |
|
17 |
class Playlist:
|
18 |
-
"""
|
19 |
-
playlist
|
20 |
-
"""
|
21 |
|
22 |
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
|
23 |
if proxies:
|
@@ -28,18 +27,16 @@ class Playlist:
|
|
28 |
except IndexError: # assume that url is just the id
|
29 |
self.playlist_id = url
|
30 |
|
31 |
-
self.playlist_url
|
32 |
-
"https://www.youtube.com/playlist?list=" + self.playlist_id
|
33 |
-
)
|
34 |
self.html = request.get(self.playlist_url)
|
35 |
|
36 |
# Needs testing with non-English
|
37 |
self.last_update: Optional[date] = None
|
38 |
-
|
39 |
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
|
40 |
)
|
41 |
-
if
|
42 |
-
month, day, year =
|
43 |
self.last_update = datetime.strptime(
|
44 |
f"{month} {day:0>2} {year}", "%b %d %Y"
|
45 |
).date()
|
@@ -48,25 +45,26 @@ class Playlist:
|
|
48 |
|
49 |
@staticmethod
|
50 |
def _find_load_more_url(req: str) -> Optional[str]:
|
51 |
-
"""Given an html page or
|
52 |
-
and returns the "load more" url if found.
|
53 |
-
"""
|
54 |
match = re.search(
|
55 |
r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
|
56 |
req,
|
57 |
)
|
58 |
if match:
|
59 |
-
return "https://www.youtube.com
|
60 |
|
61 |
return None
|
62 |
|
63 |
@deprecated("This function will be removed in the future, please use .video_urls")
|
64 |
def parse_links(self) -> List[str]: # pragma: no cover
|
|
|
|
|
|
|
|
|
65 |
return self.video_urls
|
66 |
|
67 |
def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
|
68 |
-
"""Parse the video links from the page source,
|
69 |
-
returns the /watch?v= part from video link href
|
70 |
"""
|
71 |
req = self.html
|
72 |
videos_urls = self._extract_videos(req)
|
@@ -113,6 +111,7 @@ class Playlist:
|
|
113 |
|
114 |
def trimmed(self, video_id: str) -> Iterable[str]:
|
115 |
"""Retrieve a list of YouTube video URLs trimmed at the given video ID
|
|
|
116 |
i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
|
117 |
:type video_id: str
|
118 |
video ID to trim the returned list of playlist URLs at
|
@@ -121,16 +120,15 @@ class Playlist:
|
|
121 |
List of video URLs from the playlist trimmed at the given ID
|
122 |
"""
|
123 |
for page in self._paginate(until_watch_id=video_id):
|
124 |
-
for watch_path in page
|
125 |
-
yield self._video_url(watch_path)
|
126 |
|
127 |
@property # type: ignore
|
128 |
@cache
|
129 |
def video_urls(self) -> List[str]:
|
130 |
"""Complete links of all the videos in playlist
|
|
|
131 |
:rtype: List[str]
|
132 |
-
:returns:
|
133 |
-
List of video URLs
|
134 |
"""
|
135 |
return [
|
136 |
self._video_url(video) for page in list(self._paginate()) for video in page
|
@@ -138,29 +136,27 @@ class Playlist:
|
|
138 |
|
139 |
@property
|
140 |
def videos(self) -> Iterable[YouTube]:
|
141 |
-
"""
|
142 |
-
|
|
|
143 |
"""
|
144 |
-
for url in self.video_urls
|
145 |
-
yield YouTube(url)
|
146 |
|
147 |
@deprecated(
|
148 |
"This call is unnecessary, you can directly access .video_urls or .videos"
|
149 |
)
|
150 |
def populate_video_urls(self) -> List[str]:
|
151 |
"""Complete links of all the videos in playlist
|
|
|
152 |
:rtype: List[str]
|
153 |
-
:returns:
|
154 |
-
List of video URLs
|
155 |
"""
|
156 |
-
|
157 |
return self.video_urls
|
158 |
|
159 |
@deprecated("This function will be removed in the future.")
|
160 |
def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
|
161 |
-
"""
|
162 |
-
|
163 |
-
in the playlist.
|
164 |
If the number of digits required to name a file,is less than is
|
165 |
required to name the last file,it prepends 0s.
|
166 |
So if you have a playlist of 100 videos it will number them like:
|
@@ -185,9 +181,7 @@ class Playlist:
|
|
185 |
reverse_numbering: bool = False,
|
186 |
resolution: str = "720p",
|
187 |
) -> None: # pragma: no cover
|
188 |
-
"""Download all the videos in the the playlist.
|
189 |
-
resolution is 720p (or highest available), later more option
|
190 |
-
should be added to download resolution of choice
|
191 |
|
192 |
:param download_path:
|
193 |
(optional) Output path for the playlist If one is not
|
@@ -206,7 +200,6 @@ class Playlist:
|
|
206 |
Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
|
207 |
:type resolution: str
|
208 |
"""
|
209 |
-
|
210 |
logger.debug("total videos found: %d", len(self.video_urls))
|
211 |
logger.debug("starting download")
|
212 |
|
@@ -231,22 +224,18 @@ class Playlist:
|
|
231 |
|
232 |
@cache
|
233 |
def title(self) -> Optional[str]:
|
234 |
-
"""
|
235 |
-
|
236 |
-
|
237 |
-
|
|
|
|
|
238 |
match = pattern.search(self.html)
|
239 |
|
240 |
if match is None:
|
241 |
return None
|
242 |
|
243 |
-
return (
|
244 |
-
match.group()
|
245 |
-
.replace(open_tag, "")
|
246 |
-
.replace(end_tag, "")
|
247 |
-
.replace("- YouTube", "")
|
248 |
-
.strip()
|
249 |
-
)
|
250 |
|
251 |
@staticmethod
|
252 |
def _video_url(watch_path: str):
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
"""Module to download a complete playlist from a youtube channel."""
|
4 |
|
5 |
import json
|
6 |
import logging
|
|
|
16 |
|
17 |
|
18 |
class Playlist:
|
19 |
+
"""Load a YouTube playlist with URL or ID"""
|
|
|
|
|
20 |
|
21 |
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
|
22 |
if proxies:
|
|
|
27 |
except IndexError: # assume that url is just the id
|
28 |
self.playlist_id = url
|
29 |
|
30 |
+
self.playlist_url = f"https://www.youtube.com/playlist?list={self.playlist_id}"
|
|
|
|
|
31 |
self.html = request.get(self.playlist_url)
|
32 |
|
33 |
# Needs testing with non-English
|
34 |
self.last_update: Optional[date] = None
|
35 |
+
date_match = re.search(
|
36 |
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
|
37 |
)
|
38 |
+
if date_match:
|
39 |
+
month, day, year = date_match.groups()
|
40 |
self.last_update = datetime.strptime(
|
41 |
f"{month} {day:0>2} {year}", "%b %d %Y"
|
42 |
).date()
|
|
|
45 |
|
46 |
@staticmethod
|
47 |
def _find_load_more_url(req: str) -> Optional[str]:
|
48 |
+
"""Given an html page or fragment, returns the "load more" url if found."""
|
|
|
|
|
49 |
match = re.search(
|
50 |
r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
|
51 |
req,
|
52 |
)
|
53 |
if match:
|
54 |
+
return f"https://www.youtube.com{match.group(1)}"
|
55 |
|
56 |
return None
|
57 |
|
58 |
@deprecated("This function will be removed in the future, please use .video_urls")
|
59 |
def parse_links(self) -> List[str]: # pragma: no cover
|
60 |
+
""" Deprecated function for returning list of URLs
|
61 |
+
|
62 |
+
:return: List[str]
|
63 |
+
"""
|
64 |
return self.video_urls
|
65 |
|
66 |
def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
|
67 |
+
"""Parse the video links from the page source, yields the /watch?v= part from video link
|
|
|
68 |
"""
|
69 |
req = self.html
|
70 |
videos_urls = self._extract_videos(req)
|
|
|
111 |
|
112 |
def trimmed(self, video_id: str) -> Iterable[str]:
|
113 |
"""Retrieve a list of YouTube video URLs trimmed at the given video ID
|
114 |
+
|
115 |
i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
|
116 |
:type video_id: str
|
117 |
video ID to trim the returned list of playlist URLs at
|
|
|
120 |
List of video URLs from the playlist trimmed at the given ID
|
121 |
"""
|
122 |
for page in self._paginate(until_watch_id=video_id):
|
123 |
+
yield from (self._video_url(watch_path) for watch_path in page)
|
|
|
124 |
|
125 |
@property # type: ignore
|
126 |
@cache
|
127 |
def video_urls(self) -> List[str]:
|
128 |
"""Complete links of all the videos in playlist
|
129 |
+
|
130 |
:rtype: List[str]
|
131 |
+
:returns: List of video URLs
|
|
|
132 |
"""
|
133 |
return [
|
134 |
self._video_url(video) for page in list(self._paginate()) for video in page
|
|
|
136 |
|
137 |
@property
|
138 |
def videos(self) -> Iterable[YouTube]:
|
139 |
+
"""Yields YouTube objects of videos in this playlist
|
140 |
+
|
141 |
+
:Yields: YouTube
|
142 |
"""
|
143 |
+
yield from (YouTube(url) for url in self.video_urls)
|
|
|
144 |
|
145 |
@deprecated(
|
146 |
"This call is unnecessary, you can directly access .video_urls or .videos"
|
147 |
)
|
148 |
def populate_video_urls(self) -> List[str]:
|
149 |
"""Complete links of all the videos in playlist
|
150 |
+
|
151 |
:rtype: List[str]
|
152 |
+
:returns: List of video URLs
|
|
|
153 |
"""
|
|
|
154 |
return self.video_urls
|
155 |
|
156 |
@deprecated("This function will be removed in the future.")
|
157 |
def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
|
158 |
+
"""Generate number prefixes for the items in the playlist.
|
159 |
+
|
|
|
160 |
If the number of digits required to name a file,is less than is
|
161 |
required to name the last file,it prepends 0s.
|
162 |
So if you have a playlist of 100 videos it will number them like:
|
|
|
181 |
reverse_numbering: bool = False,
|
182 |
resolution: str = "720p",
|
183 |
) -> None: # pragma: no cover
|
184 |
+
"""Download all the videos in the the playlist.
|
|
|
|
|
185 |
|
186 |
:param download_path:
|
187 |
(optional) Output path for the playlist If one is not
|
|
|
200 |
Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
|
201 |
:type resolution: str
|
202 |
"""
|
|
|
203 |
logger.debug("total videos found: %d", len(self.video_urls))
|
204 |
logger.debug("starting download")
|
205 |
|
|
|
224 |
|
225 |
@cache
|
226 |
def title(self) -> Optional[str]:
|
227 |
+
"""Extract playlist title
|
228 |
+
|
229 |
+
:return: playlist title (name)
|
230 |
+
:rtype: Optional[str]
|
231 |
+
"""
|
232 |
+
pattern = re.compile("<title>(.+?)</title>")
|
233 |
match = pattern.search(self.html)
|
234 |
|
235 |
if match is None:
|
236 |
return None
|
237 |
|
238 |
+
return match.group(1).replace("- YouTube", "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
@staticmethod
|
241 |
def _video_url(watch_path: str):
|
pytube/exceptions.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
"""Library specific exception definitions."""
|
3 |
from typing import Union, Pattern
|
4 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
"""Library specific exception definitions."""
|
4 |
from typing import Union, Pattern
|
5 |
|
pytube/helpers.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
"""Various helper functions implemented by pytube."""
|
3 |
import functools
|
4 |
import logging
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
"""Various helper functions implemented by pytube."""
|
4 |
import functools
|
5 |
import logging
|
pytube/monostate.py
CHANGED
@@ -1,9 +1,9 @@
|
|
|
|
|
|
1 |
import io
|
2 |
from typing import Any, Optional
|
3 |
from typing_extensions import Protocol
|
4 |
|
5 |
-
# from __future__ import annotations
|
6 |
-
|
7 |
|
8 |
class OnProgress(Protocol):
|
9 |
def __call__(
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
import io
|
4 |
from typing import Any, Optional
|
5 |
from typing_extensions import Protocol
|
6 |
|
|
|
|
|
7 |
|
8 |
class OnProgress(Protocol):
|
9 |
def __call__(
|
pytube/query.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
"""This module provides a query interface for media streams and captions."""
|
3 |
from typing import List, Optional
|
4 |
|
@@ -232,6 +233,7 @@ class StreamQuery:
|
|
232 |
|
233 |
def get_by_resolution(self, resolution: str) -> Optional[Stream]:
|
234 |
"""Get the corresponding :class:`Stream <Stream>` for a given resolution.
|
|
|
235 |
Stream must be a progressive mp4.
|
236 |
|
237 |
:param str resolution:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
"""This module provides a query interface for media streams and captions."""
|
4 |
from typing import List, Optional
|
5 |
|
|
|
233 |
|
234 |
def get_by_resolution(self, resolution: str) -> Optional[Stream]:
|
235 |
"""Get the corresponding :class:`Stream <Stream>` for a given resolution.
|
236 |
+
|
237 |
Stream must be a progressive mp4.
|
238 |
|
239 |
:param str resolution:
|
pytube/request.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
"""Implements a simple wrapper around urlopen."""
|
3 |
from typing import Any, Iterable, Dict
|
4 |
from urllib.request import Request
|
@@ -25,10 +26,8 @@ def get(url) -> str:
|
|
25 |
|
26 |
def stream(url: str, chunk_size: int = 8192) -> Iterable[bytes]:
|
27 |
"""Read the response in chunks.
|
28 |
-
:param str url:
|
29 |
-
|
30 |
-
:param int chunk_size:
|
31 |
-
The size in bytes of each chunk. Defaults to 8*1024
|
32 |
:rtype: Iterable[bytes]
|
33 |
"""
|
34 |
response = _execute_request(url)
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
"""Implements a simple wrapper around urlopen."""
|
4 |
from typing import Any, Iterable, Dict
|
5 |
from urllib.request import Request
|
|
|
26 |
|
27 |
def stream(url: str, chunk_size: int = 8192) -> Iterable[bytes]:
|
28 |
"""Read the response in chunks.
|
29 |
+
:param str url: The URL to perform the GET request for.
|
30 |
+
:param int chunk_size: The size in bytes of each chunk. Defaults to 8*1024
|
|
|
|
|
31 |
:rtype: Iterable[bytes]
|
32 |
"""
|
33 |
response = _execute_request(url)
|
pytube/streams.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
|
|
2 |
"""
|
3 |
This module contains a container for stream manifest data.
|
4 |
|
@@ -169,7 +170,6 @@ class Stream:
|
|
169 |
:returns:
|
170 |
An os file system compatible filename.
|
171 |
"""
|
172 |
-
|
173 |
filename = safe_filename(self.title)
|
174 |
return f"{filename}.{self.subtype}"
|
175 |
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
"""
|
4 |
This module contains a container for stream manifest data.
|
5 |
|
|
|
170 |
:returns:
|
171 |
An os file system compatible filename.
|
172 |
"""
|
|
|
173 |
filename = safe_filename(self.title)
|
174 |
return f"{filename}.{self.subtype}"
|
175 |
|
pytube/version.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
__version__ = "9.6.1"
|
2 |
|
3 |
if __name__ == "__main__":
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
__version__ = "9.6.1"
|
4 |
|
5 |
if __name__ == "__main__":
|
tests/test_streams.py
CHANGED
@@ -199,29 +199,25 @@ def test_on_complete_hook(cipher_signature, mocker):
|
|
199 |
|
200 |
def test_author(cipher_signature):
|
201 |
expected = "Test author"
|
202 |
-
cipher_signature.
|
203 |
-
"player_response": {"videoDetails": {"author": expected}}
|
204 |
-
}
|
205 |
assert cipher_signature.author == expected
|
206 |
|
207 |
expected = "unknown"
|
208 |
-
cipher_signature.
|
209 |
assert cipher_signature.author == expected
|
210 |
|
211 |
|
212 |
def test_thumbnail_when_in_details(cipher_signature):
|
213 |
expected = "some url"
|
214 |
-
cipher_signature.
|
215 |
-
"
|
216 |
-
"videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
|
217 |
-
}
|
218 |
}
|
219 |
assert cipher_signature.thumbnail_url == expected
|
220 |
|
221 |
|
222 |
def test_thumbnail_when_not_in_details(cipher_signature):
|
223 |
expected = "https://img.youtube.com/vi/9bZkp7q19f0/maxresdefault.jpg"
|
224 |
-
cipher_signature.
|
225 |
assert cipher_signature.thumbnail_url == expected
|
226 |
|
227 |
|
|
|
199 |
|
200 |
def test_author(cipher_signature):
|
201 |
expected = "Test author"
|
202 |
+
cipher_signature.player_response = {"videoDetails": {"author": expected}}
|
|
|
|
|
203 |
assert cipher_signature.author == expected
|
204 |
|
205 |
expected = "unknown"
|
206 |
+
cipher_signature.player_response = {}
|
207 |
assert cipher_signature.author == expected
|
208 |
|
209 |
|
210 |
def test_thumbnail_when_in_details(cipher_signature):
|
211 |
expected = "some url"
|
212 |
+
cipher_signature.player_response = {
|
213 |
+
"videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
|
|
|
|
|
214 |
}
|
215 |
assert cipher_signature.thumbnail_url == expected
|
216 |
|
217 |
|
218 |
def test_thumbnail_when_not_in_details(cipher_signature):
|
219 |
expected = "https://img.youtube.com/vi/9bZkp7q19f0/maxresdefault.jpg"
|
220 |
+
cipher_signature.player_response = {}
|
221 |
assert cipher_signature.thumbnail_url == expected
|
222 |
|
223 |
|