Taylor Fox Dahlin
commited on
Improveme unavailable video handling (#835)
Browse files* Added new exception for videos only available to members of a channel.
* Added functionality for extracting and parsing ytInitialData.
* Account for changes in YouTube Playability API causing change in data for private videos.
- pytube/__main__.py +30 -8
- pytube/exceptions.py +16 -0
- pytube/extract.py +40 -0
- tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz +0 -0
- tests/test_exceptions.py +4 -4
pytube/__main__.py
CHANGED
@@ -20,6 +20,7 @@ from pytube import extract
|
|
20 |
from pytube import request
|
21 |
from pytube import Stream
|
22 |
from pytube import StreamQuery
|
|
|
23 |
from pytube.exceptions import RecordingUnavailable
|
24 |
from pytube.exceptions import VideoUnavailable
|
25 |
from pytube.exceptions import VideoPrivate
|
@@ -101,6 +102,34 @@ class YouTube:
|
|
101 |
self.prefetch()
|
102 |
self.descramble()
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
def descramble(self) -> None:
|
105 |
"""Descramble the stream data and build Stream instances.
|
106 |
|
@@ -168,16 +197,9 @@ class YouTube:
|
|
168 |
:rtype: None
|
169 |
"""
|
170 |
self.watch_html = request.get(url=self.watch_url)
|
171 |
-
|
172 |
-
raise VideoUnavailable(video_id=self.video_id)
|
173 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
174 |
|
175 |
-
if extract.is_private(self.watch_html):
|
176 |
-
raise VideoPrivate(video_id=self.video_id)
|
177 |
-
|
178 |
-
if not extract.recording_available(self.watch_html):
|
179 |
-
raise RecordingUnavailable(video_id=self.video_id)
|
180 |
-
|
181 |
if self.age_restricted:
|
182 |
if not self.embed_html:
|
183 |
self.embed_html = request.get(url=self.embed_url)
|
|
|
20 |
from pytube import request
|
21 |
from pytube import Stream
|
22 |
from pytube import StreamQuery
|
23 |
+
from pytube.exceptions import MembersOnly
|
24 |
from pytube.exceptions import RecordingUnavailable
|
25 |
from pytube.exceptions import VideoUnavailable
|
26 |
from pytube.exceptions import VideoPrivate
|
|
|
102 |
self.prefetch()
|
103 |
self.descramble()
|
104 |
|
105 |
+
def check_availability(self):
|
106 |
+
"""Check whether the video is available.
|
107 |
+
Raises different exceptions based on why the video is unavailable,
|
108 |
+
otherwise does nothing.
|
109 |
+
|
110 |
+
"""
|
111 |
+
if self.watch_html is None:
|
112 |
+
raise VideoUnavailable(video_id=self.video_id)
|
113 |
+
|
114 |
+
status, messages = extract.playability_status(self.watch_html)
|
115 |
+
for reason in messages:
|
116 |
+
if status == 'UNPLAYABLE':
|
117 |
+
if reason == (
|
118 |
+
'Join this channel to get access to members-only content '
|
119 |
+
'like this video, and other exclusive perks.'
|
120 |
+
):
|
121 |
+
raise MembersOnly(video_id=self.video_id)
|
122 |
+
elif reason == 'This live stream recording is not available.':
|
123 |
+
raise RecordingUnavailable(video_id=self.video_id)
|
124 |
+
else:
|
125 |
+
raise VideoUnavailable(video_id=self.video_id)
|
126 |
+
elif status == 'LOGIN_REQUIRED':
|
127 |
+
if reason == (
|
128 |
+
'This is a private video. '
|
129 |
+
'Please sign in to verify that you may see it.'
|
130 |
+
):
|
131 |
+
raise VideoPrivate(video_id=self.video_id)
|
132 |
+
|
133 |
def descramble(self) -> None:
|
134 |
"""Descramble the stream data and build Stream instances.
|
135 |
|
|
|
197 |
:rtype: None
|
198 |
"""
|
199 |
self.watch_html = request.get(url=self.watch_url)
|
200 |
+
self.check_availability()
|
|
|
201 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
|
202 |
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
if self.age_restricted:
|
204 |
if not self.embed_html:
|
205 |
self.embed_html = request.get(url=self.embed_url)
|
pytube/exceptions.py
CHANGED
@@ -80,5 +80,21 @@ class RecordingUnavailable(ExtractError):
|
|
80 |
self.video_id = video_id
|
81 |
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
class HTMLParseError(PytubeError):
|
84 |
"""HTML could not be parsed"""
|
|
|
80 |
self.video_id = video_id
|
81 |
|
82 |
|
83 |
+
class MembersOnly(PytubeError):
|
84 |
+
"""Video is members-only.
|
85 |
+
|
86 |
+
YouTube has special videos that are only viewable to users who have
|
87 |
+
subscribed to a content creator.
|
88 |
+
ref: https://support.google.com/youtube/answer/7544492?hl=en
|
89 |
+
"""
|
90 |
+
def __init__(self, video_id: str):
|
91 |
+
"""
|
92 |
+
:param str video_id:
|
93 |
+
A YouTube video identifier.
|
94 |
+
"""
|
95 |
+
super().__init__('%s is a members-only video' % video_id)
|
96 |
+
self.video_id = video_id
|
97 |
+
|
98 |
+
|
99 |
class HTMLParseError(PytubeError):
|
100 |
"""HTML could not be parsed"""
|
pytube/extract.py
CHANGED
@@ -100,6 +100,30 @@ def is_age_restricted(watch_html: str) -> bool:
|
|
100 |
return True
|
101 |
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
def video_id(url: str) -> str:
|
104 |
"""Extract the ``video_id`` from a YouTube url.
|
105 |
|
@@ -438,6 +462,22 @@ def initial_data(watch_html: str) -> str:
|
|
438 |
return {}
|
439 |
|
440 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
441 |
def metadata(initial_data) -> Optional[YouTubeMetadata]:
|
442 |
"""Get the informational metadata for the video.
|
443 |
|
|
|
100 |
return True
|
101 |
|
102 |
|
103 |
+
def playability_status(watch_html: str) -> (str, str):
|
104 |
+
"""Return the playability status and status explanation of a video.
|
105 |
+
|
106 |
+
For example, a video may have a status of LOGIN_REQUIRED, and an explanation
|
107 |
+
of "This is a private video. Please sign in to verify that you may see it."
|
108 |
+
|
109 |
+
This explanation is what gets incorporated into the media player overlay.
|
110 |
+
|
111 |
+
:param str watch_html:
|
112 |
+
The html contents of the watch page.
|
113 |
+
:rtype: bool
|
114 |
+
:returns:
|
115 |
+
Playability status and reason of the video.
|
116 |
+
"""
|
117 |
+
player_response = json.loads(initial_player_response(watch_html))
|
118 |
+
status_dict = player_response.get('playabilityStatus', {})
|
119 |
+
if 'status' in status_dict:
|
120 |
+
if 'reason' in status_dict:
|
121 |
+
return status_dict['status'], [status_dict['reason']]
|
122 |
+
if 'messages' in status_dict:
|
123 |
+
return status_dict['status'], status_dict['messages']
|
124 |
+
return None, [None]
|
125 |
+
|
126 |
+
|
127 |
def video_id(url: str) -> str:
|
128 |
"""Extract the ``video_id`` from a YouTube url.
|
129 |
|
|
|
462 |
return {}
|
463 |
|
464 |
|
465 |
+
def initial_player_response(watch_html: str) -> str:
|
466 |
+
"""Extract the ytInitialPlayerResponse json from the watch_html page.
|
467 |
+
|
468 |
+
This mostly contains metadata necessary for rendering the page on-load,
|
469 |
+
such as video information, copyright notices, etc.
|
470 |
+
|
471 |
+
@param watch_html: Html of the watch page
|
472 |
+
@return:
|
473 |
+
"""
|
474 |
+
pattern = r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*({[^\n]+});"
|
475 |
+
try:
|
476 |
+
return regex_search(pattern, watch_html, 1)
|
477 |
+
except RegexMatchError:
|
478 |
+
return "{}"
|
479 |
+
|
480 |
+
|
481 |
def metadata(initial_data) -> Optional[YouTubeMetadata]:
|
482 |
"""Get the informational metadata for the video.
|
483 |
|
tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz
CHANGED
Binary files a/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz and b/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz differ
|
|
tests/test_exceptions.py
CHANGED
@@ -43,10 +43,10 @@ def test_recording_unavailable():
|
|
43 |
|
44 |
def test_private_error():
|
45 |
try:
|
46 |
-
raise VideoPrivate('
|
47 |
except VideoPrivate as e:
|
48 |
-
assert e.video_id == '
|
49 |
-
assert str(e) == '
|
50 |
|
51 |
|
52 |
def test_raises_video_private(private):
|
@@ -58,7 +58,7 @@ def test_raises_video_private(private):
|
|
58 |
]
|
59 |
mock_url_open.return_value = mock_url_open_object
|
60 |
with pytest.raises(VideoPrivate):
|
61 |
-
YouTube('https://youtube.com/watch?v=
|
62 |
|
63 |
|
64 |
def test_raises_recording_unavailable(missing_recording):
|
|
|
43 |
|
44 |
def test_private_error():
|
45 |
try:
|
46 |
+
raise VideoPrivate('m8uHb5jIGN8')
|
47 |
except VideoPrivate as e:
|
48 |
+
assert e.video_id == 'm8uHb5jIGN8' # noqa: PT017
|
49 |
+
assert str(e) == 'm8uHb5jIGN8 is a private video'
|
50 |
|
51 |
|
52 |
def test_raises_video_private(private):
|
|
|
58 |
]
|
59 |
mock_url_open.return_value = mock_url_open_object
|
60 |
with pytest.raises(VideoPrivate):
|
61 |
+
YouTube('https://youtube.com/watch?v=m8uHb5jIGN8')
|
62 |
|
63 |
|
64 |
def test_raises_recording_unavailable(missing_recording):
|