hbmartin commited on
Commit
61e7e36
·
1 Parent(s): b2d5e61

experiment with wps

Browse files
Files changed (2) hide show
  1. .flake8 +4 -2
  2. pytube/contrib/playlist.py +29 -43
.flake8 CHANGED
@@ -1,3 +1,5 @@
1
  [flake8]
2
- ignore = E231,E203,W503
3
- max-line-length = 89
 
 
 
1
  [flake8]
2
+ ignore = E231,E203,W503,Q000,WPS111,WPS305,WPS348,WPS602,D400,DAR201,S101,DAR101,C812,D104,I001,WPS306,WPS214,D401,WPS229,WPS420
3
+ max-line-length = 89
4
+
5
+ [isort]
pytube/contrib/playlist.py CHANGED
@@ -1,5 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
- """Module to download a complete playlist from a youtube channel"""
 
3
 
4
  import json
5
  import logging
@@ -15,9 +16,7 @@ logger = logging.getLogger(__name__)
15
 
16
 
17
  class Playlist:
18
- """Handles all the task of manipulating and downloading a whole YouTube
19
- playlist
20
- """
21
 
22
  def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
23
  if proxies:
@@ -28,18 +27,16 @@ class Playlist:
28
  except IndexError: # assume that url is just the id
29
  self.playlist_id = url
30
 
31
- self.playlist_url: str = (
32
- "https://www.youtube.com/playlist?list=" + self.playlist_id
33
- )
34
  self.html = request.get(self.playlist_url)
35
 
36
  # Needs testing with non-English
37
  self.last_update: Optional[date] = None
38
- results = re.search(
39
  r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
40
  )
41
- if results:
42
- month, day, year = results.groups()
43
  self.last_update = datetime.strptime(
44
  f"{month} {day:0>2} {year}", "%b %d %Y"
45
  ).date()
@@ -48,15 +45,14 @@ class Playlist:
48
 
49
  @staticmethod
50
  def _find_load_more_url(req: str) -> Optional[str]:
51
- """Given an html page or a fragment thereof, looks for
52
- and returns the "load more" url if found.
53
  """
54
  match = re.search(
55
  r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
56
  req,
57
  )
58
  if match:
59
- return "https://www.youtube.com" + match.group(1)
60
 
61
  return None
62
 
@@ -65,8 +61,7 @@ class Playlist:
65
  return self.video_urls
66
 
67
  def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
68
- """Parse the video links from the page source, extracts and
69
- returns the /watch?v= part from video link href
70
  """
71
  req = self.html
72
  videos_urls = self._extract_videos(req)
@@ -113,6 +108,7 @@ class Playlist:
113
 
114
  def trimmed(self, video_id: str) -> Iterable[str]:
115
  """Retrieve a list of YouTube video URLs trimmed at the given video ID
 
116
  i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
117
  :type video_id: str
118
  video ID to trim the returned list of playlist URLs at
@@ -121,16 +117,15 @@ class Playlist:
121
  List of video URLs from the playlist trimmed at the given ID
122
  """
123
  for page in self._paginate(until_watch_id=video_id):
124
- for watch_path in page:
125
- yield self._video_url(watch_path)
126
 
127
  @property # type: ignore
128
  @cache
129
  def video_urls(self) -> List[str]:
130
  """Complete links of all the videos in playlist
 
131
  :rtype: List[str]
132
- :returns:
133
- List of video URLs
134
  """
135
  return [
136
  self._video_url(video) for page in list(self._paginate()) for video in page
@@ -138,29 +133,27 @@ class Playlist:
138
 
139
  @property
140
  def videos(self) -> Iterable[YouTube]:
141
- """Iterable of YouTube objects representing videos in this playlist
 
142
  :rtype: Iterable[YouTube]
143
  """
144
- for url in self.video_urls:
145
- yield YouTube(url)
146
 
147
  @deprecated(
148
  "This call is unnecessary, you can directly access .video_urls or .videos"
149
  )
150
  def populate_video_urls(self) -> List[str]:
151
  """Complete links of all the videos in playlist
 
152
  :rtype: List[str]
153
- :returns:
154
- List of video URLs
155
  """
156
-
157
  return self.video_urls
158
 
159
  @deprecated("This function will be removed in the future.")
160
  def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
161
- """
162
- This generator function generates number prefixes, for the items
163
- in the playlist.
164
  If the number of digits required to name a file,is less than is
165
  required to name the last file,it prepends 0s.
166
  So if you have a playlist of 100 videos it will number them like:
@@ -185,9 +178,7 @@ class Playlist:
185
  reverse_numbering: bool = False,
186
  resolution: str = "720p",
187
  ) -> None: # pragma: no cover
188
- """Download all the videos in the the playlist. Initially, download
189
- resolution is 720p (or highest available), later more option
190
- should be added to download resolution of choice
191
 
192
  :param download_path:
193
  (optional) Output path for the playlist If one is not
@@ -206,7 +197,6 @@ class Playlist:
206
  Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
207
  :type resolution: str
208
  """
209
-
210
  logger.debug("total videos found: %d", len(self.video_urls))
211
  logger.debug("starting download")
212
 
@@ -231,22 +221,18 @@ class Playlist:
231
 
232
  @cache
233
  def title(self) -> Optional[str]:
234
- """return playlist title (name)"""
235
- open_tag = "<title>"
236
- end_tag = "</title>"
237
- pattern = re.compile(open_tag + "(.+?)" + end_tag)
 
 
238
  match = pattern.search(self.html)
239
 
240
  if match is None:
241
  return None
242
 
243
- return (
244
- match.group()
245
- .replace(open_tag, "")
246
- .replace(end_tag, "")
247
- .replace("- YouTube", "")
248
- .strip()
249
- )
250
 
251
  @staticmethod
252
  def _video_url(watch_path: str):
 
1
  # -*- coding: utf-8 -*-
2
+
3
+ """Module to download a complete playlist from a youtube channel."""
4
 
5
  import json
6
  import logging
 
16
 
17
 
18
  class Playlist:
19
+ """Load a YouTube playlist with URL or ID"""
 
 
20
 
21
  def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
22
  if proxies:
 
27
  except IndexError: # assume that url is just the id
28
  self.playlist_id = url
29
 
30
+ self.playlist_url = f"https://www.youtube.com/playlist?list={self.playlist_id}"
 
 
31
  self.html = request.get(self.playlist_url)
32
 
33
  # Needs testing with non-English
34
  self.last_update: Optional[date] = None
35
+ date_match = re.search(
36
  r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
37
  )
38
+ if date_match:
39
+ month, day, year = date_match.groups()
40
  self.last_update = datetime.strptime(
41
  f"{month} {day:0>2} {year}", "%b %d %Y"
42
  ).date()
 
45
 
46
  @staticmethod
47
  def _find_load_more_url(req: str) -> Optional[str]:
48
+ """Given an html page or fragment, returns the "load more" url if found.
 
49
  """
50
  match = re.search(
51
  r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
52
  req,
53
  )
54
  if match:
55
+ return f"https://www.youtube.com{match.group(1)}"
56
 
57
  return None
58
 
 
61
  return self.video_urls
62
 
63
  def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
64
+ """Parse the video links from the page source, yields the /watch?v= part from video link
 
65
  """
66
  req = self.html
67
  videos_urls = self._extract_videos(req)
 
108
 
109
  def trimmed(self, video_id: str) -> Iterable[str]:
110
  """Retrieve a list of YouTube video URLs trimmed at the given video ID
111
+
112
  i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
113
  :type video_id: str
114
  video ID to trim the returned list of playlist URLs at
 
117
  List of video URLs from the playlist trimmed at the given ID
118
  """
119
  for page in self._paginate(until_watch_id=video_id):
120
+ yield from (self._video_url(watch_path) for watch_path in page)
 
121
 
122
  @property # type: ignore
123
  @cache
124
  def video_urls(self) -> List[str]:
125
  """Complete links of all the videos in playlist
126
+
127
  :rtype: List[str]
128
+ :returns: List of video URLs
 
129
  """
130
  return [
131
  self._video_url(video) for page in list(self._paginate()) for video in page
 
133
 
134
  @property
135
  def videos(self) -> Iterable[YouTube]:
136
+ """Yields YouTube objects of videos in this playlist
137
+
138
  :rtype: Iterable[YouTube]
139
  """
140
+ yield from (YouTube(url) for url in self.video_urls)
 
141
 
142
  @deprecated(
143
  "This call is unnecessary, you can directly access .video_urls or .videos"
144
  )
145
  def populate_video_urls(self) -> List[str]:
146
  """Complete links of all the videos in playlist
147
+
148
  :rtype: List[str]
149
+ :returns: List of video URLs
 
150
  """
 
151
  return self.video_urls
152
 
153
  @deprecated("This function will be removed in the future.")
154
  def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
155
+ """Generate number prefixes for the items in the playlist.
156
+
 
157
  If the number of digits required to name a file,is less than is
158
  required to name the last file,it prepends 0s.
159
  So if you have a playlist of 100 videos it will number them like:
 
178
  reverse_numbering: bool = False,
179
  resolution: str = "720p",
180
  ) -> None: # pragma: no cover
181
+ """Download all the videos in the the playlist.
 
 
182
 
183
  :param download_path:
184
  (optional) Output path for the playlist If one is not
 
197
  Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
198
  :type resolution: str
199
  """
 
200
  logger.debug("total videos found: %d", len(self.video_urls))
201
  logger.debug("starting download")
202
 
 
221
 
222
  @cache
223
  def title(self) -> Optional[str]:
224
+ """Extract playlist title
225
+
226
+ :return: playlist title (name)
227
+ :rtype: Optional[str]
228
+ """
229
+ pattern = re.compile("<title>(.+?)</title>")
230
  match = pattern.search(self.html)
231
 
232
  if match is None:
233
  return None
234
 
235
+ return match.group(1).replace("- YouTube", "").strip()
 
 
 
 
 
 
236
 
237
  @staticmethod
238
  def _video_url(watch_path: str):