Taylor Fox Dahlin commited on
Commit
1e91539
·
unverified ·
1 Parent(s): 6e4ef92

Improveme unavailable video handling (#835)

Browse files

* Added new exception for videos only available to members of a channel.
* Added functionality for extracting and parsing ytInitialData.
* Account for changes in YouTube Playability API causing change in data for private videos.

pytube/__main__.py CHANGED
@@ -20,6 +20,7 @@ from pytube import extract
20
  from pytube import request
21
  from pytube import Stream
22
  from pytube import StreamQuery
 
23
  from pytube.exceptions import RecordingUnavailable
24
  from pytube.exceptions import VideoUnavailable
25
  from pytube.exceptions import VideoPrivate
@@ -101,6 +102,34 @@ class YouTube:
101
  self.prefetch()
102
  self.descramble()
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def descramble(self) -> None:
105
  """Descramble the stream data and build Stream instances.
106
 
@@ -168,16 +197,9 @@ class YouTube:
168
  :rtype: None
169
  """
170
  self.watch_html = request.get(url=self.watch_url)
171
- if self.watch_html is None:
172
- raise VideoUnavailable(video_id=self.video_id)
173
  self.age_restricted = extract.is_age_restricted(self.watch_html)
174
 
175
- if extract.is_private(self.watch_html):
176
- raise VideoPrivate(video_id=self.video_id)
177
-
178
- if not extract.recording_available(self.watch_html):
179
- raise RecordingUnavailable(video_id=self.video_id)
180
-
181
  if self.age_restricted:
182
  if not self.embed_html:
183
  self.embed_html = request.get(url=self.embed_url)
 
20
  from pytube import request
21
  from pytube import Stream
22
  from pytube import StreamQuery
23
+ from pytube.exceptions import MembersOnly
24
  from pytube.exceptions import RecordingUnavailable
25
  from pytube.exceptions import VideoUnavailable
26
  from pytube.exceptions import VideoPrivate
 
102
  self.prefetch()
103
  self.descramble()
104
 
105
+ def check_availability(self):
106
+ """Check whether the video is available.
107
+ Raises different exceptions based on why the video is unavailable,
108
+ otherwise does nothing.
109
+
110
+ """
111
+ if self.watch_html is None:
112
+ raise VideoUnavailable(video_id=self.video_id)
113
+
114
+ status, messages = extract.playability_status(self.watch_html)
115
+ for reason in messages:
116
+ if status == 'UNPLAYABLE':
117
+ if reason == (
118
+ 'Join this channel to get access to members-only content '
119
+ 'like this video, and other exclusive perks.'
120
+ ):
121
+ raise MembersOnly(video_id=self.video_id)
122
+ elif reason == 'This live stream recording is not available.':
123
+ raise RecordingUnavailable(video_id=self.video_id)
124
+ else:
125
+ raise VideoUnavailable(video_id=self.video_id)
126
+ elif status == 'LOGIN_REQUIRED':
127
+ if reason == (
128
+ 'This is a private video. '
129
+ 'Please sign in to verify that you may see it.'
130
+ ):
131
+ raise VideoPrivate(video_id=self.video_id)
132
+
133
  def descramble(self) -> None:
134
  """Descramble the stream data and build Stream instances.
135
 
 
197
  :rtype: None
198
  """
199
  self.watch_html = request.get(url=self.watch_url)
200
+ self.check_availability()
 
201
  self.age_restricted = extract.is_age_restricted(self.watch_html)
202
 
 
 
 
 
 
 
203
  if self.age_restricted:
204
  if not self.embed_html:
205
  self.embed_html = request.get(url=self.embed_url)
pytube/exceptions.py CHANGED
@@ -80,5 +80,21 @@ class RecordingUnavailable(ExtractError):
80
  self.video_id = video_id
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  class HTMLParseError(PytubeError):
84
  """HTML could not be parsed"""
 
80
  self.video_id = video_id
81
 
82
 
83
+ class MembersOnly(PytubeError):
84
+ """Video is members-only.
85
+
86
+ YouTube has special videos that are only viewable to users who have
87
+ subscribed to a content creator.
88
+ ref: https://support.google.com/youtube/answer/7544492?hl=en
89
+ """
90
+ def __init__(self, video_id: str):
91
+ """
92
+ :param str video_id:
93
+ A YouTube video identifier.
94
+ """
95
+ super().__init__('%s is a members-only video' % video_id)
96
+ self.video_id = video_id
97
+
98
+
99
  class HTMLParseError(PytubeError):
100
  """HTML could not be parsed"""
pytube/extract.py CHANGED
@@ -100,6 +100,30 @@ def is_age_restricted(watch_html: str) -> bool:
100
  return True
101
 
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def video_id(url: str) -> str:
104
  """Extract the ``video_id`` from a YouTube url.
105
 
@@ -438,6 +462,22 @@ def initial_data(watch_html: str) -> str:
438
  return {}
439
 
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  def metadata(initial_data) -> Optional[YouTubeMetadata]:
442
  """Get the informational metadata for the video.
443
 
 
100
  return True
101
 
102
 
103
+ def playability_status(watch_html: str) -> (str, str):
104
+ """Return the playability status and status explanation of a video.
105
+
106
+ For example, a video may have a status of LOGIN_REQUIRED, and an explanation
107
+ of "This is a private video. Please sign in to verify that you may see it."
108
+
109
+ This explanation is what gets incorporated into the media player overlay.
110
+
111
+ :param str watch_html:
112
+ The html contents of the watch page.
113
+ :rtype: bool
114
+ :returns:
115
+ Playability status and reason of the video.
116
+ """
117
+ player_response = json.loads(initial_player_response(watch_html))
118
+ status_dict = player_response.get('playabilityStatus', {})
119
+ if 'status' in status_dict:
120
+ if 'reason' in status_dict:
121
+ return status_dict['status'], [status_dict['reason']]
122
+ if 'messages' in status_dict:
123
+ return status_dict['status'], status_dict['messages']
124
+ return None, [None]
125
+
126
+
127
  def video_id(url: str) -> str:
128
  """Extract the ``video_id`` from a YouTube url.
129
 
 
462
  return {}
463
 
464
 
465
+ def initial_player_response(watch_html: str) -> str:
466
+ """Extract the ytInitialPlayerResponse json from the watch_html page.
467
+
468
+ This mostly contains metadata necessary for rendering the page on-load,
469
+ such as video information, copyright notices, etc.
470
+
471
+ @param watch_html: Html of the watch page
472
+ @return:
473
+ """
474
+ pattern = r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*({[^\n]+});"
475
+ try:
476
+ return regex_search(pattern, watch_html, 1)
477
+ except RegexMatchError:
478
+ return "{}"
479
+
480
+
481
  def metadata(initial_data) -> Optional[YouTubeMetadata]:
482
  """Get the informational metadata for the video.
483
 
tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz and b/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz differ
 
tests/test_exceptions.py CHANGED
@@ -43,10 +43,10 @@ def test_recording_unavailable():
43
 
44
  def test_private_error():
45
  try:
46
- raise VideoPrivate('mRe-514tGMg')
47
  except VideoPrivate as e:
48
- assert e.video_id == 'mRe-514tGMg' # noqa: PT017
49
- assert str(e) == 'mRe-514tGMg is a private video'
50
 
51
 
52
  def test_raises_video_private(private):
@@ -58,7 +58,7 @@ def test_raises_video_private(private):
58
  ]
59
  mock_url_open.return_value = mock_url_open_object
60
  with pytest.raises(VideoPrivate):
61
- YouTube('https://youtube.com/watch?v=mRe-514tGMg')
62
 
63
 
64
  def test_raises_recording_unavailable(missing_recording):
 
43
 
44
  def test_private_error():
45
  try:
46
+ raise VideoPrivate('m8uHb5jIGN8')
47
  except VideoPrivate as e:
48
+ assert e.video_id == 'm8uHb5jIGN8' # noqa: PT017
49
+ assert str(e) == 'm8uHb5jIGN8 is a private video'
50
 
51
 
52
  def test_raises_video_private(private):
 
58
  ]
59
  mock_url_open.return_value = mock_url_open_object
60
  with pytest.raises(VideoPrivate):
61
+ YouTube('https://youtube.com/watch?v=m8uHb5jIGN8')
62
 
63
 
64
  def test_raises_recording_unavailable(missing_recording):