Python 3.x! Reorganized file structure, removed print statements.
Browse files- pytube/__init__.py +6 -342
- pytube/api.py +242 -0
- pytube/exceptions.py +12 -0
- pytube/models.py +59 -0
- pytube/utils.py +27 -0
- tests/__init__.py +0 -0
pytube/__init__.py
CHANGED
@@ -1,344 +1,8 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
from
|
4 |
-
from urllib import urlencode
|
5 |
-
from urllib2 import urlopen
|
6 |
-
from urlparse import urlparse, parse_qs
|
7 |
|
8 |
-
import re
|
9 |
-
|
10 |
-
YT_BASE_URL = 'http://www.youtube.com/get_video_info'
|
11 |
-
|
12 |
-
#YouTube quality and codecs id map.
|
13 |
-
#source: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
|
14 |
-
YT_ENCODING = {
|
15 |
-
#Flash Video
|
16 |
-
5: ["flv", "240p", "Sorenson H.263", "N/A", "0.25", "MP3", "64"],
|
17 |
-
6: ["flv", "270p", "Sorenson H.263", "N/A", "0.8", "MP3", "64"],
|
18 |
-
34: ["flv", "360p", "H.264", "Main", "0.5", "AAC", "128"],
|
19 |
-
35: ["flv", "480p", "H.264", "Main", "0.8-1", "AAC", "128"],
|
20 |
-
|
21 |
-
#3GP
|
22 |
-
36: ["3gp", "240p", "MPEG-4 Visual", "Simple", "0.17", "AAC", "38"],
|
23 |
-
13: ["3gp", "N/A", "MPEG-4 Visual", "N/A", "0.5", "AAC", "N/A"],
|
24 |
-
17: ["3gp", "144p", "MPEG-4 Visual", "Simple", "0.05", "AAC", "24"],
|
25 |
-
|
26 |
-
#MPEG-4
|
27 |
-
18: ["mp4", "360p", "H.264", "Baseline", "0.5", "AAC", "96"],
|
28 |
-
22: ["mp4", "720p", "H.264", "High", "2-2.9", "AAC", "192"],
|
29 |
-
37: ["mp4", "1080p", "H.264", "High", "3-4.3", "AAC", "192"],
|
30 |
-
38: ["mp4", "3072p", "H.264", "High", "3.5-5", "AAC", "192"],
|
31 |
-
82: ["mp4", "360p", "H.264", "3D", "0.5", "AAC", "96"],
|
32 |
-
83: ["mp4", "240p", "H.264", "3D", "0.5", "AAC", "96"],
|
33 |
-
84: ["mp4", "720p", "H.264", "3D", "2-2.9", "AAC", "152"],
|
34 |
-
85: ["mp4", "520p", "H.264", "3D", "2-2.9", "AAC", "152"],
|
35 |
-
|
36 |
-
#WebM
|
37 |
-
43: ["webm", "360p", "VP8", "N/A", "0.5", "Vorbis", "128"],
|
38 |
-
44: ["webm", "480p", "VP8", "N/A", "1", "Vorbis", "128"],
|
39 |
-
45: ["webm", "720p", "VP8", "N/A", "2", "Vorbis", "192"],
|
40 |
-
46: ["webm", "1080p", "VP8", "N/A", "N/A", "Vorbis", "192"],
|
41 |
-
100: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "128"],
|
42 |
-
101: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "192"],
|
43 |
-
102: ["webm", "720p", "VP8", "3D", "N/A", "Vorbis", "192"]
|
44 |
-
}
|
45 |
-
|
46 |
-
# The keys corresponding to the quality/codec map above.
|
47 |
-
YT_ENCODING_KEYS = (
|
48 |
-
'extension', 'resolution', 'video_codec', 'profile', 'video_bitrate',
|
49 |
-
'audio_codec', 'audio_bitrate'
|
50 |
-
)
|
51 |
-
|
52 |
-
|
53 |
-
class MultipleObjectsReturned(Exception):
|
54 |
-
"""
|
55 |
-
The query returned multiple objects when only one was expected.
|
56 |
-
"""
|
57 |
-
pass
|
58 |
-
|
59 |
-
|
60 |
-
class YouTubeError(Exception):
|
61 |
-
"""
|
62 |
-
The REST interface returned an error.
|
63 |
-
"""
|
64 |
-
pass
|
65 |
-
|
66 |
-
|
67 |
-
class Video(object):
|
68 |
-
"""
|
69 |
-
Class representation of a single instance of a YouTube video.
|
70 |
-
"""
|
71 |
-
def __init__(self, url, filename, **attributes):
|
72 |
-
"""
|
73 |
-
Define the variables required to declare a new video.
|
74 |
-
|
75 |
-
Keyword arguments:
|
76 |
-
extention -- The file extention the video should be saved as.
|
77 |
-
resolution -- The broadcasting standard of the video.
|
78 |
-
url -- The url of the video. (e.g.: youtube.com/watch?v=..)
|
79 |
-
filename -- The filename (minus the extention) to save the video.
|
80 |
-
"""
|
81 |
-
|
82 |
-
self.url = url
|
83 |
-
self.filename = filename
|
84 |
-
self.__dict__.update(**attributes)
|
85 |
-
|
86 |
-
def download(self, path=None):
|
87 |
-
"""
|
88 |
-
Downloads the file of the URL defined within the class
|
89 |
-
instance.
|
90 |
-
|
91 |
-
Keyword arguments:
|
92 |
-
path -- Destination directory
|
93 |
-
"""
|
94 |
-
|
95 |
-
path = (normpath(path) + '/' if path else '')
|
96 |
-
response = urlopen(self.url)
|
97 |
-
with open(path + self.filename, 'wb') as dst_file:
|
98 |
-
meta_data = dict(response.info().items())
|
99 |
-
file_size = int(meta_data.get("Content-Length") or
|
100 |
-
meta_data.get("content-length"))
|
101 |
-
print "Downloading: %s Bytes: %s" % (self.filename, file_size)
|
102 |
-
|
103 |
-
bytes_received = 0
|
104 |
-
chunk_size = 8192
|
105 |
-
while True:
|
106 |
-
buffer = response.read(chunk_size)
|
107 |
-
if not buffer:
|
108 |
-
break
|
109 |
-
|
110 |
-
bytes_received += len(buffer)
|
111 |
-
dst_file.write(buffer)
|
112 |
-
percent = bytes_received * 100. / file_size
|
113 |
-
status = r"%10d [%3.2f%%]" % (bytes_received, percent)
|
114 |
-
status = status + chr(8) * (len(status) + 1)
|
115 |
-
print status,
|
116 |
-
|
117 |
-
def __repr__(self):
|
118 |
-
"""A cleaner representation of the class instance."""
|
119 |
-
return "<Video: %s (.%s) - %s>" % (self.video_codec, self.extension,
|
120 |
-
self.resolution)
|
121 |
-
|
122 |
-
def __lt__(self, other):
|
123 |
-
if type(other) == Video:
|
124 |
-
v1 = "%s %s" % (self.extension, self.resolution)
|
125 |
-
v2 = "%s %s" % (other.extension, other.resolution)
|
126 |
-
return (v1 > v2) - (v1 < v2) < 0
|
127 |
-
|
128 |
-
|
129 |
-
class YouTube(object):
|
130 |
-
_filename = None
|
131 |
-
_fmt_values = []
|
132 |
-
_video_url = None
|
133 |
-
title = None
|
134 |
-
videos = []
|
135 |
-
# fmt was an undocumented URL parameter that allowed selecting
|
136 |
-
# YouTube quality mode without using player user interface.
|
137 |
-
|
138 |
-
@property
|
139 |
-
def url(self):
|
140 |
-
"""Exposes the video url."""
|
141 |
-
return self._video_url
|
142 |
-
|
143 |
-
@url.setter
|
144 |
-
def url(self, url):
|
145 |
-
""" Defines the URL of the YouTube video."""
|
146 |
-
self._video_url = url
|
147 |
-
#Reset the filename.
|
148 |
-
self._filename = None
|
149 |
-
#Get the video details.
|
150 |
-
self._get_video_info()
|
151 |
-
|
152 |
-
@property
|
153 |
-
def filename(self):
|
154 |
-
"""
|
155 |
-
Exposes the title of the video. If this is not set, one is
|
156 |
-
generated based on the name of the video.
|
157 |
-
"""
|
158 |
-
if not self._filename:
|
159 |
-
self._filename = safe_filename(self.title)
|
160 |
-
return self._filename
|
161 |
-
|
162 |
-
@filename.setter
|
163 |
-
def filename(self, filename):
|
164 |
-
""" Defines the filename."""
|
165 |
-
self._filename = filename
|
166 |
-
|
167 |
-
@property
|
168 |
-
def video_id(self):
|
169 |
-
"""Gets the video ID extracted from the URL."""
|
170 |
-
parts = urlparse(self._video_url)
|
171 |
-
qs = getattr(parts, 'query', None)
|
172 |
-
if qs:
|
173 |
-
video_id = parse_qs(qs).get('v', None)
|
174 |
-
if video_id:
|
175 |
-
return video_id.pop()
|
176 |
-
|
177 |
-
def get(self, extension=None, res=None):
|
178 |
-
"""
|
179 |
-
Return a single video given an extention and resolution.
|
180 |
-
|
181 |
-
Keyword arguments:
|
182 |
-
extention -- The desired file extention (e.g.: mp4).
|
183 |
-
res -- The desired broadcasting standard of the video (e.g.: 1080p).
|
184 |
-
"""
|
185 |
-
result = []
|
186 |
-
for v in self.videos:
|
187 |
-
if extension and v.extension != extension:
|
188 |
-
continue
|
189 |
-
elif res and v.resolution != res:
|
190 |
-
continue
|
191 |
-
else:
|
192 |
-
result.append(v)
|
193 |
-
if not len(result):
|
194 |
-
return
|
195 |
-
elif len(result) is 1:
|
196 |
-
return result[0]
|
197 |
-
else:
|
198 |
-
d = len(result)
|
199 |
-
raise MultipleObjectsReturned("get() returned more than one "
|
200 |
-
"object -- it returned %d!" % d)
|
201 |
-
|
202 |
-
def filter(self, extension=None, res=None):
|
203 |
-
"""
|
204 |
-
Return a filtered list of videos given an extention and
|
205 |
-
resolution criteria.
|
206 |
-
|
207 |
-
Keyword arguments:
|
208 |
-
extention -- The desired file extention (e.g.: mp4).
|
209 |
-
res -- The desired broadcasting standard of the video (e.g.: 1080p).
|
210 |
-
"""
|
211 |
-
results = []
|
212 |
-
for v in self.videos:
|
213 |
-
if extension and v.extension != extension:
|
214 |
-
continue
|
215 |
-
elif res and v.resolution != res:
|
216 |
-
continue
|
217 |
-
else:
|
218 |
-
results.append(v)
|
219 |
-
return results
|
220 |
-
|
221 |
-
def _fetch(self, path, data):
|
222 |
-
"""
|
223 |
-
Given a path, traverse the response for the desired data. (A
|
224 |
-
modified ver. of my dictionary traverse method:
|
225 |
-
https://gist.github.com/2009119)
|
226 |
-
|
227 |
-
Keyword arguments:
|
228 |
-
path -- A tuple representing a path to a node within a tree.
|
229 |
-
data -- The data containing the tree.
|
230 |
-
"""
|
231 |
-
elem = path[0]
|
232 |
-
#Get first element in tuple, and check if it contains a list.
|
233 |
-
if type(data) is list:
|
234 |
-
# Pop it, and let's continue..
|
235 |
-
return self._fetch(path, data.pop())
|
236 |
-
#Parse the url encoded data
|
237 |
-
data = parse_qs(data)
|
238 |
-
#Get the element in our path
|
239 |
-
data = data.get(elem, None)
|
240 |
-
#Offset the tuple by 1.
|
241 |
-
path = path[1::1]
|
242 |
-
#Check if the path has reached the end OR the element return
|
243 |
-
#nothing.
|
244 |
-
if len(path) is 0 or data is None:
|
245 |
-
if type(data) is list and len(data) is 1:
|
246 |
-
data = data.pop()
|
247 |
-
return data
|
248 |
-
else:
|
249 |
-
# Nope, let's keep diggin'
|
250 |
-
return self._fetch(path, data)
|
251 |
-
|
252 |
-
def _get_video_info(self):
|
253 |
-
"""
|
254 |
-
This is responsable for executing the request, extracting the
|
255 |
-
necessary details, and populating the different video
|
256 |
-
resolutions and formats into a list.
|
257 |
-
"""
|
258 |
-
querystring = urlencode({'asv': 3, 'el': 'detailpage', 'hl': 'en_US',
|
259 |
-
'video_id': self.video_id})
|
260 |
-
|
261 |
-
self.title = None
|
262 |
-
self.videos = []
|
263 |
-
|
264 |
-
response = urlopen(YT_BASE_URL + '?' + querystring)
|
265 |
-
|
266 |
-
if response:
|
267 |
-
content = response.read().decode()
|
268 |
-
data = parse_qs(content)
|
269 |
-
if 'errorcode' in data:
|
270 |
-
error = data.get('reason', 'An unknown error has occurred')
|
271 |
-
if isinstance(error, list):
|
272 |
-
error = error.pop()
|
273 |
-
raise YouTubeError(error)
|
274 |
-
|
275 |
-
#Use my cool traversing method to extract the specific
|
276 |
-
#attribute from the response body.
|
277 |
-
path = ('url_encoded_fmt_stream_map', 'url')
|
278 |
-
video_urls = self._fetch(path, content)
|
279 |
-
#Get the video signatures, YouTube require them as an url component
|
280 |
-
path = ('url_encoded_fmt_stream_map', 'sig')
|
281 |
-
video_signatures = self._fetch(path, content)
|
282 |
-
self.title = self._fetch(('title',), content)
|
283 |
-
|
284 |
-
for idx in range(len(video_urls)):
|
285 |
-
url = video_urls[idx]
|
286 |
-
signature = video_signatures[idx]
|
287 |
-
try:
|
288 |
-
fmt, data = self._extract_fmt(url)
|
289 |
-
filename = "%s.%s" % (self.filename, data['extension'])
|
290 |
-
except (TypeError, KeyError):
|
291 |
-
pass
|
292 |
-
else:
|
293 |
-
#Add video signature to url
|
294 |
-
url = "%s&signature=%s" % (url, signature)
|
295 |
-
v = Video(url, filename, **data)
|
296 |
-
self.videos.append(v)
|
297 |
-
self._fmt_values.append(fmt)
|
298 |
-
self.videos.sort()
|
299 |
-
|
300 |
-
def _extract_fmt(self, text):
|
301 |
-
"""
|
302 |
-
YouTube does not pass you a completely valid URLencoded form,
|
303 |
-
I suspect this is suppose to act as a deterrent.. Nothing some
|
304 |
-
regulular expressions couldn't handle.
|
305 |
-
|
306 |
-
Keyword arguments:
|
307 |
-
text -- The malformed data contained within each url node.
|
308 |
-
"""
|
309 |
-
itag = re.findall('itag=(\d+)', text)
|
310 |
-
if itag and len(itag) is 1:
|
311 |
-
itag = int(itag[0])
|
312 |
-
attr = YT_ENCODING.get(itag, None)
|
313 |
-
if not attr:
|
314 |
-
return itag, None
|
315 |
-
data = {}
|
316 |
-
map(lambda k, v: data.update({k: v}), YT_ENCODING_KEYS, attr)
|
317 |
-
return itag, data
|
318 |
-
|
319 |
-
|
320 |
-
def safe_filename(text, max_length=200):
|
321 |
-
"""
|
322 |
-
Sanitizes filenames for many operating systems.
|
323 |
-
|
324 |
-
Keyword arguments:
|
325 |
-
text -- The unsanitized pending filename.
|
326 |
-
"""
|
327 |
-
#Quickly truncates long filenames.
|
328 |
-
truncate = lambda text: text[:max_length].rsplit(' ', 0)[0]
|
329 |
-
|
330 |
-
#Tidy up ugly formatted filenames.
|
331 |
-
text = text.replace('_', ' ')
|
332 |
-
text = text.replace(':', ' -')
|
333 |
-
|
334 |
-
#NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
|
335 |
-
ntfs = [chr(i) for i in range(0, 31)]
|
336 |
-
|
337 |
-
#Removing these SHOULD make most filename safe for a wide range
|
338 |
-
#of operating systems.
|
339 |
-
paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
|
340 |
-
'\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']
|
341 |
-
|
342 |
-
blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
|
343 |
-
filename = blacklist.sub('', text)
|
344 |
-
return truncate(filename)
|
|
|
1 |
+
__title__ = 'pytube'
|
2 |
+
__version__ = '1.0.0'
|
3 |
+
__author__ = 'Nick Ficano'
|
4 |
+
__license__ = 'MIT License'
|
5 |
+
__copyright__ = 'Copyright 2013 Nick Ficano'
|
6 |
|
7 |
+
from .api import YouTube
|
|
|
|
|
|
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytube/api.py
ADDED
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import unicode_literals
|
2 |
+
|
3 |
+
from .exceptions import MultipleObjectsReturned, YouTubeError
|
4 |
+
from .models import Video
|
5 |
+
from .utils import safe_filename
|
6 |
+
from urllib import urlencode
|
7 |
+
from urllib2 import urlopen
|
8 |
+
from urlparse import urlparse, parse_qs
|
9 |
+
|
10 |
+
import re
|
11 |
+
|
12 |
+
YT_BASE_URL = 'http://www.youtube.com/get_video_info'
|
13 |
+
|
14 |
+
#YouTube quality and codecs id map.
|
15 |
+
#source: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
|
16 |
+
YT_ENCODING = {
|
17 |
+
#Flash Video
|
18 |
+
5: ["flv", "240p", "Sorenson H.263", "N/A", "0.25", "MP3", "64"],
|
19 |
+
6: ["flv", "270p", "Sorenson H.263", "N/A", "0.8", "MP3", "64"],
|
20 |
+
34: ["flv", "360p", "H.264", "Main", "0.5", "AAC", "128"],
|
21 |
+
35: ["flv", "480p", "H.264", "Main", "0.8-1", "AAC", "128"],
|
22 |
+
|
23 |
+
#3GP
|
24 |
+
36: ["3gp", "240p", "MPEG-4 Visual", "Simple", "0.17", "AAC", "38"],
|
25 |
+
13: ["3gp", "N/A", "MPEG-4 Visual", "N/A", "0.5", "AAC", "N/A"],
|
26 |
+
17: ["3gp", "144p", "MPEG-4 Visual", "Simple", "0.05", "AAC", "24"],
|
27 |
+
|
28 |
+
#MPEG-4
|
29 |
+
18: ["mp4", "360p", "H.264", "Baseline", "0.5", "AAC", "96"],
|
30 |
+
22: ["mp4", "720p", "H.264", "High", "2-2.9", "AAC", "192"],
|
31 |
+
37: ["mp4", "1080p", "H.264", "High", "3-4.3", "AAC", "192"],
|
32 |
+
38: ["mp4", "3072p", "H.264", "High", "3.5-5", "AAC", "192"],
|
33 |
+
82: ["mp4", "360p", "H.264", "3D", "0.5", "AAC", "96"],
|
34 |
+
83: ["mp4", "240p", "H.264", "3D", "0.5", "AAC", "96"],
|
35 |
+
84: ["mp4", "720p", "H.264", "3D", "2-2.9", "AAC", "152"],
|
36 |
+
85: ["mp4", "520p", "H.264", "3D", "2-2.9", "AAC", "152"],
|
37 |
+
|
38 |
+
#WebM
|
39 |
+
43: ["webm", "360p", "VP8", "N/A", "0.5", "Vorbis", "128"],
|
40 |
+
44: ["webm", "480p", "VP8", "N/A", "1", "Vorbis", "128"],
|
41 |
+
45: ["webm", "720p", "VP8", "N/A", "2", "Vorbis", "192"],
|
42 |
+
46: ["webm", "1080p", "VP8", "N/A", "N/A", "Vorbis", "192"],
|
43 |
+
100: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "128"],
|
44 |
+
101: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "192"],
|
45 |
+
102: ["webm", "720p", "VP8", "3D", "N/A", "Vorbis", "192"]
|
46 |
+
}
|
47 |
+
|
48 |
+
# The keys corresponding to the quality/codec map above.
|
49 |
+
YT_ENCODING_KEYS = (
|
50 |
+
'extension', 'resolution', 'video_codec', 'profile', 'video_bitrate',
|
51 |
+
'audio_codec', 'audio_bitrate'
|
52 |
+
)
|
53 |
+
|
54 |
+
class YouTube(object):
|
55 |
+
_filename = None
|
56 |
+
_fmt_values = []
|
57 |
+
_video_url = None
|
58 |
+
title = None
|
59 |
+
videos = []
|
60 |
+
# fmt was an undocumented URL parameter that allowed selecting
|
61 |
+
# YouTube quality mode without using player user interface.
|
62 |
+
|
63 |
+
@property
|
64 |
+
def url(self):
|
65 |
+
"""Exposes the video url."""
|
66 |
+
return self._video_url
|
67 |
+
|
68 |
+
@url.setter
|
69 |
+
def url(self, url):
|
70 |
+
""" Defines the URL of the YouTube video."""
|
71 |
+
self._video_url = url
|
72 |
+
#Reset the filename.
|
73 |
+
self._filename = None
|
74 |
+
#Get the video details.
|
75 |
+
self._get_video_info()
|
76 |
+
|
77 |
+
@property
|
78 |
+
def filename(self):
|
79 |
+
"""
|
80 |
+
Exposes the title of the video. If this is not set, one is
|
81 |
+
generated based on the name of the video.
|
82 |
+
"""
|
83 |
+
if not self._filename:
|
84 |
+
self._filename = safe_filename(self.title)
|
85 |
+
return self._filename
|
86 |
+
|
87 |
+
@filename.setter
|
88 |
+
def filename(self, filename):
|
89 |
+
""" Defines the filename."""
|
90 |
+
self._filename = filename
|
91 |
+
|
92 |
+
@property
|
93 |
+
def video_id(self):
|
94 |
+
"""Gets the video ID extracted from the URL."""
|
95 |
+
parts = urlparse(self._video_url)
|
96 |
+
qs = getattr(parts, 'query', None)
|
97 |
+
if qs:
|
98 |
+
video_id = parse_qs(qs).get('v', None)
|
99 |
+
if video_id:
|
100 |
+
return video_id.pop()
|
101 |
+
|
102 |
+
def get(self, extension=None, res=None):
|
103 |
+
"""
|
104 |
+
Return a single video given an extention and resolution.
|
105 |
+
|
106 |
+
Keyword arguments:
|
107 |
+
extention -- The desired file extention (e.g.: mp4).
|
108 |
+
res -- The desired broadcasting standard of the video (e.g.: 1080p).
|
109 |
+
"""
|
110 |
+
result = []
|
111 |
+
for v in self.videos:
|
112 |
+
if extension and v.extension != extension:
|
113 |
+
continue
|
114 |
+
elif res and v.resolution != res:
|
115 |
+
continue
|
116 |
+
else:
|
117 |
+
result.append(v)
|
118 |
+
if not len(result):
|
119 |
+
return
|
120 |
+
elif len(result) is 1:
|
121 |
+
return result[0]
|
122 |
+
else:
|
123 |
+
d = len(result)
|
124 |
+
raise MultipleObjectsReturned("get() returned more than one "
|
125 |
+
"object -- it returned %d!" % d)
|
126 |
+
|
127 |
+
def filter(self, extension=None, res=None):
|
128 |
+
"""
|
129 |
+
Return a filtered list of videos given an extention and
|
130 |
+
resolution criteria.
|
131 |
+
|
132 |
+
Keyword arguments:
|
133 |
+
extention -- The desired file extention (e.g.: mp4).
|
134 |
+
res -- The desired broadcasting standard of the video (e.g.: 1080p).
|
135 |
+
"""
|
136 |
+
results = []
|
137 |
+
for v in self.videos:
|
138 |
+
if extension and v.extension != extension:
|
139 |
+
continue
|
140 |
+
elif res and v.resolution != res:
|
141 |
+
continue
|
142 |
+
else:
|
143 |
+
results.append(v)
|
144 |
+
return results
|
145 |
+
|
146 |
+
def _fetch(self, path, data):
|
147 |
+
"""
|
148 |
+
Given a path, traverse the response for the desired data. (A
|
149 |
+
modified ver. of my dictionary traverse method:
|
150 |
+
https://gist.github.com/2009119)
|
151 |
+
|
152 |
+
Keyword arguments:
|
153 |
+
path -- A tuple representing a path to a node within a tree.
|
154 |
+
data -- The data containing the tree.
|
155 |
+
"""
|
156 |
+
elem = path[0]
|
157 |
+
#Get first element in tuple, and check if it contains a list.
|
158 |
+
if type(data) is list:
|
159 |
+
# Pop it, and let's continue..
|
160 |
+
return self._fetch(path, data.pop())
|
161 |
+
#Parse the url encoded data
|
162 |
+
data = parse_qs(data)
|
163 |
+
#Get the element in our path
|
164 |
+
data = data.get(elem, None)
|
165 |
+
#Offset the tuple by 1.
|
166 |
+
path = path[1::1]
|
167 |
+
#Check if the path has reached the end OR the element return
|
168 |
+
#nothing.
|
169 |
+
if len(path) is 0 or data is None:
|
170 |
+
if type(data) is list and len(data) is 1:
|
171 |
+
data = data.pop()
|
172 |
+
return data
|
173 |
+
else:
|
174 |
+
# Nope, let's keep diggin'
|
175 |
+
return self._fetch(path, data)
|
176 |
+
|
177 |
+
def _get_video_info(self):
|
178 |
+
"""
|
179 |
+
This is responsable for executing the request, extracting the
|
180 |
+
necessary details, and populating the different video
|
181 |
+
resolutions and formats into a list.
|
182 |
+
"""
|
183 |
+
querystring = urlencode({'asv': 3, 'el': 'detailpage', 'hl': 'en_US',
|
184 |
+
'video_id': self.video_id})
|
185 |
+
|
186 |
+
self.title = None
|
187 |
+
self.videos = []
|
188 |
+
|
189 |
+
response = urlopen(YT_BASE_URL + '?' + querystring)
|
190 |
+
|
191 |
+
if response:
|
192 |
+
content = response.read().decode()
|
193 |
+
data = parse_qs(content)
|
194 |
+
if 'errorcode' in data:
|
195 |
+
error = data.get('reason', 'An unknown error has occurred')
|
196 |
+
if isinstance(error, list):
|
197 |
+
error = error.pop()
|
198 |
+
raise YouTubeError(error)
|
199 |
+
|
200 |
+
#Use my cool traversing method to extract the specific
|
201 |
+
#attribute from the response body.
|
202 |
+
path = ('url_encoded_fmt_stream_map', 'url')
|
203 |
+
video_urls = self._fetch(path, content)
|
204 |
+
#Get the video signatures, YouTube require them as an url component
|
205 |
+
path = ('url_encoded_fmt_stream_map', 'sig')
|
206 |
+
video_signatures = self._fetch(path, content)
|
207 |
+
self.title = self._fetch(('title',), content)
|
208 |
+
|
209 |
+
for idx in range(len(video_urls)):
|
210 |
+
url = video_urls[idx]
|
211 |
+
signature = video_signatures[idx]
|
212 |
+
try:
|
213 |
+
fmt, data = self._extract_fmt(url)
|
214 |
+
filename = "%s.%s" % (self.filename, data['extension'])
|
215 |
+
except (TypeError, KeyError):
|
216 |
+
pass
|
217 |
+
else:
|
218 |
+
#Add video signature to url
|
219 |
+
url = "%s&signature=%s" % (url, signature)
|
220 |
+
v = Video(url, filename, **data)
|
221 |
+
self.videos.append(v)
|
222 |
+
self._fmt_values.append(fmt)
|
223 |
+
self.videos.sort()
|
224 |
+
|
225 |
+
def _extract_fmt(self, text):
|
226 |
+
"""
|
227 |
+
YouTube does not pass you a completely valid URLencoded form,
|
228 |
+
I suspect this is suppose to act as a deterrent.. Nothing some
|
229 |
+
regulular expressions couldn't handle.
|
230 |
+
|
231 |
+
Keyword arguments:
|
232 |
+
text -- The malformed data contained within each url node.
|
233 |
+
"""
|
234 |
+
itag = re.findall('itag=(\d+)', text)
|
235 |
+
if itag and len(itag) is 1:
|
236 |
+
itag = int(itag[0])
|
237 |
+
attr = YT_ENCODING.get(itag, None)
|
238 |
+
if not attr:
|
239 |
+
return itag, None
|
240 |
+
data = {}
|
241 |
+
map(lambda k, v: data.update({k: v}), YT_ENCODING_KEYS, attr)
|
242 |
+
return itag, data
|
pytube/exceptions.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class MultipleObjectsReturned(Exception):
|
2 |
+
"""
|
3 |
+
The query returned multiple objects when only one was expected.
|
4 |
+
"""
|
5 |
+
pass
|
6 |
+
|
7 |
+
|
8 |
+
class YouTubeError(Exception):
|
9 |
+
"""
|
10 |
+
The REST interface returned an error.
|
11 |
+
"""
|
12 |
+
pass
|
pytube/models.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from os.path import normpath
|
2 |
+
from urllib2 import urlopen
|
3 |
+
|
4 |
+
class Video(object):
|
5 |
+
"""
|
6 |
+
Class representation of a single instance of a YouTube video.
|
7 |
+
"""
|
8 |
+
def __init__(self, url, filename, **attributes):
|
9 |
+
"""
|
10 |
+
Define the variables required to declare a new video.
|
11 |
+
|
12 |
+
Keyword arguments:
|
13 |
+
extention -- The file extention the video should be saved as.
|
14 |
+
resolution -- The broadcasting standard of the video.
|
15 |
+
url -- The url of the video. (e.g.: youtube.com/watch?v=..)
|
16 |
+
filename -- The filename (minus the extention) to save the video.
|
17 |
+
"""
|
18 |
+
|
19 |
+
self.url = url
|
20 |
+
self.filename = filename
|
21 |
+
self.__dict__.update(**attributes)
|
22 |
+
|
23 |
+
def download(self, path=None, chunk_size=8*1024):
|
24 |
+
"""
|
25 |
+
Downloads the file of the URL defined within the class
|
26 |
+
instance.
|
27 |
+
|
28 |
+
Keyword arguments:
|
29 |
+
path -- Destination directory
|
30 |
+
chunk_size -- File size (in bytes) to write to buffer at a time
|
31 |
+
(default: 8 bytes).
|
32 |
+
"""
|
33 |
+
|
34 |
+
path = (normpath(path) + '/' if path else '')
|
35 |
+
response = urlopen(self.url)
|
36 |
+
with open(path + self.filename, 'wb') as dst_file:
|
37 |
+
meta_data = dict(response.info().items())
|
38 |
+
file_size = int(meta_data.get("Content-Length") or
|
39 |
+
meta_data.get("content-length"))
|
40 |
+
self._bytes_received = 0
|
41 |
+
self._buffer = buffer
|
42 |
+
while True:
|
43 |
+
self._buffer = response.read(chunk_size)
|
44 |
+
if not self._buffer:
|
45 |
+
break
|
46 |
+
|
47 |
+
self._bytes_received += len(self._buffer)
|
48 |
+
dst_file.write(self._buffer)
|
49 |
+
|
50 |
+
def __repr__(self):
|
51 |
+
"""A cleaner representation of the class instance."""
|
52 |
+
return "<Video: %s (.%s) - %s>" % (self.video_codec, self.extension,
|
53 |
+
self.resolution)
|
54 |
+
|
55 |
+
def __lt__(self, other):
|
56 |
+
if type(other) == Video:
|
57 |
+
v1 = "%s %s" % (self.extension, self.resolution)
|
58 |
+
v2 = "%s %s" % (other.extension, other.resolution)
|
59 |
+
return (v1 > v2) - (v1 < v2) < 0
|
pytube/utils.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def safe_filename(text, max_length=200):
|
4 |
+
"""
|
5 |
+
Sanitizes filenames for many operating systems.
|
6 |
+
|
7 |
+
Keyword arguments:
|
8 |
+
text -- The unsanitized pending filename.
|
9 |
+
"""
|
10 |
+
#Quickly truncates long filenames.
|
11 |
+
truncate = lambda text: text[:max_length].rsplit(' ', 0)[0]
|
12 |
+
|
13 |
+
#Tidy up ugly formatted filenames.
|
14 |
+
text = text.replace('_', ' ')
|
15 |
+
text = text.replace(':', ' -')
|
16 |
+
|
17 |
+
#NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
|
18 |
+
ntfs = [chr(i) for i in range(0, 31)]
|
19 |
+
|
20 |
+
#Removing these SHOULD make most filename safe for a wide range
|
21 |
+
#of operating systems.
|
22 |
+
paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
|
23 |
+
'\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']
|
24 |
+
|
25 |
+
blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
|
26 |
+
filename = blacklist.sub('', text)
|
27 |
+
return truncate(filename)
|
tests/__init__.py
ADDED
File without changes
|