nficano commited on
Commit
f984924
·
1 Parent(s): aaf6096

Python 3.x! Reorganized file structure, removed print statements.

Browse files
pytube/__init__.py CHANGED
@@ -1,344 +1,8 @@
1
- from __future__ import unicode_literals
 
 
 
 
2
 
3
- from os.path import normpath
4
- from urllib import urlencode
5
- from urllib2 import urlopen
6
- from urlparse import urlparse, parse_qs
7
 
8
- import re
9
-
10
- YT_BASE_URL = 'http://www.youtube.com/get_video_info'
11
-
12
- #YouTube quality and codecs id map.
13
- #source: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
14
- YT_ENCODING = {
15
- #Flash Video
16
- 5: ["flv", "240p", "Sorenson H.263", "N/A", "0.25", "MP3", "64"],
17
- 6: ["flv", "270p", "Sorenson H.263", "N/A", "0.8", "MP3", "64"],
18
- 34: ["flv", "360p", "H.264", "Main", "0.5", "AAC", "128"],
19
- 35: ["flv", "480p", "H.264", "Main", "0.8-1", "AAC", "128"],
20
-
21
- #3GP
22
- 36: ["3gp", "240p", "MPEG-4 Visual", "Simple", "0.17", "AAC", "38"],
23
- 13: ["3gp", "N/A", "MPEG-4 Visual", "N/A", "0.5", "AAC", "N/A"],
24
- 17: ["3gp", "144p", "MPEG-4 Visual", "Simple", "0.05", "AAC", "24"],
25
-
26
- #MPEG-4
27
- 18: ["mp4", "360p", "H.264", "Baseline", "0.5", "AAC", "96"],
28
- 22: ["mp4", "720p", "H.264", "High", "2-2.9", "AAC", "192"],
29
- 37: ["mp4", "1080p", "H.264", "High", "3-4.3", "AAC", "192"],
30
- 38: ["mp4", "3072p", "H.264", "High", "3.5-5", "AAC", "192"],
31
- 82: ["mp4", "360p", "H.264", "3D", "0.5", "AAC", "96"],
32
- 83: ["mp4", "240p", "H.264", "3D", "0.5", "AAC", "96"],
33
- 84: ["mp4", "720p", "H.264", "3D", "2-2.9", "AAC", "152"],
34
- 85: ["mp4", "520p", "H.264", "3D", "2-2.9", "AAC", "152"],
35
-
36
- #WebM
37
- 43: ["webm", "360p", "VP8", "N/A", "0.5", "Vorbis", "128"],
38
- 44: ["webm", "480p", "VP8", "N/A", "1", "Vorbis", "128"],
39
- 45: ["webm", "720p", "VP8", "N/A", "2", "Vorbis", "192"],
40
- 46: ["webm", "1080p", "VP8", "N/A", "N/A", "Vorbis", "192"],
41
- 100: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "128"],
42
- 101: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "192"],
43
- 102: ["webm", "720p", "VP8", "3D", "N/A", "Vorbis", "192"]
44
- }
45
-
46
- # The keys corresponding to the quality/codec map above.
47
- YT_ENCODING_KEYS = (
48
- 'extension', 'resolution', 'video_codec', 'profile', 'video_bitrate',
49
- 'audio_codec', 'audio_bitrate'
50
- )
51
-
52
-
53
- class MultipleObjectsReturned(Exception):
54
- """
55
- The query returned multiple objects when only one was expected.
56
- """
57
- pass
58
-
59
-
60
- class YouTubeError(Exception):
61
- """
62
- The REST interface returned an error.
63
- """
64
- pass
65
-
66
-
67
- class Video(object):
68
- """
69
- Class representation of a single instance of a YouTube video.
70
- """
71
- def __init__(self, url, filename, **attributes):
72
- """
73
- Define the variables required to declare a new video.
74
-
75
- Keyword arguments:
76
- extention -- The file extention the video should be saved as.
77
- resolution -- The broadcasting standard of the video.
78
- url -- The url of the video. (e.g.: youtube.com/watch?v=..)
79
- filename -- The filename (minus the extention) to save the video.
80
- """
81
-
82
- self.url = url
83
- self.filename = filename
84
- self.__dict__.update(**attributes)
85
-
86
- def download(self, path=None):
87
- """
88
- Downloads the file of the URL defined within the class
89
- instance.
90
-
91
- Keyword arguments:
92
- path -- Destination directory
93
- """
94
-
95
- path = (normpath(path) + '/' if path else '')
96
- response = urlopen(self.url)
97
- with open(path + self.filename, 'wb') as dst_file:
98
- meta_data = dict(response.info().items())
99
- file_size = int(meta_data.get("Content-Length") or
100
- meta_data.get("content-length"))
101
- print "Downloading: %s Bytes: %s" % (self.filename, file_size)
102
-
103
- bytes_received = 0
104
- chunk_size = 8192
105
- while True:
106
- buffer = response.read(chunk_size)
107
- if not buffer:
108
- break
109
-
110
- bytes_received += len(buffer)
111
- dst_file.write(buffer)
112
- percent = bytes_received * 100. / file_size
113
- status = r"%10d [%3.2f%%]" % (bytes_received, percent)
114
- status = status + chr(8) * (len(status) + 1)
115
- print status,
116
-
117
- def __repr__(self):
118
- """A cleaner representation of the class instance."""
119
- return "<Video: %s (.%s) - %s>" % (self.video_codec, self.extension,
120
- self.resolution)
121
-
122
- def __lt__(self, other):
123
- if type(other) == Video:
124
- v1 = "%s %s" % (self.extension, self.resolution)
125
- v2 = "%s %s" % (other.extension, other.resolution)
126
- return (v1 > v2) - (v1 < v2) < 0
127
-
128
-
129
- class YouTube(object):
130
- _filename = None
131
- _fmt_values = []
132
- _video_url = None
133
- title = None
134
- videos = []
135
- # fmt was an undocumented URL parameter that allowed selecting
136
- # YouTube quality mode without using player user interface.
137
-
138
- @property
139
- def url(self):
140
- """Exposes the video url."""
141
- return self._video_url
142
-
143
- @url.setter
144
- def url(self, url):
145
- """ Defines the URL of the YouTube video."""
146
- self._video_url = url
147
- #Reset the filename.
148
- self._filename = None
149
- #Get the video details.
150
- self._get_video_info()
151
-
152
- @property
153
- def filename(self):
154
- """
155
- Exposes the title of the video. If this is not set, one is
156
- generated based on the name of the video.
157
- """
158
- if not self._filename:
159
- self._filename = safe_filename(self.title)
160
- return self._filename
161
-
162
- @filename.setter
163
- def filename(self, filename):
164
- """ Defines the filename."""
165
- self._filename = filename
166
-
167
- @property
168
- def video_id(self):
169
- """Gets the video ID extracted from the URL."""
170
- parts = urlparse(self._video_url)
171
- qs = getattr(parts, 'query', None)
172
- if qs:
173
- video_id = parse_qs(qs).get('v', None)
174
- if video_id:
175
- return video_id.pop()
176
-
177
- def get(self, extension=None, res=None):
178
- """
179
- Return a single video given an extention and resolution.
180
-
181
- Keyword arguments:
182
- extention -- The desired file extention (e.g.: mp4).
183
- res -- The desired broadcasting standard of the video (e.g.: 1080p).
184
- """
185
- result = []
186
- for v in self.videos:
187
- if extension and v.extension != extension:
188
- continue
189
- elif res and v.resolution != res:
190
- continue
191
- else:
192
- result.append(v)
193
- if not len(result):
194
- return
195
- elif len(result) is 1:
196
- return result[0]
197
- else:
198
- d = len(result)
199
- raise MultipleObjectsReturned("get() returned more than one "
200
- "object -- it returned %d!" % d)
201
-
202
- def filter(self, extension=None, res=None):
203
- """
204
- Return a filtered list of videos given an extention and
205
- resolution criteria.
206
-
207
- Keyword arguments:
208
- extention -- The desired file extention (e.g.: mp4).
209
- res -- The desired broadcasting standard of the video (e.g.: 1080p).
210
- """
211
- results = []
212
- for v in self.videos:
213
- if extension and v.extension != extension:
214
- continue
215
- elif res and v.resolution != res:
216
- continue
217
- else:
218
- results.append(v)
219
- return results
220
-
221
- def _fetch(self, path, data):
222
- """
223
- Given a path, traverse the response for the desired data. (A
224
- modified ver. of my dictionary traverse method:
225
- https://gist.github.com/2009119)
226
-
227
- Keyword arguments:
228
- path -- A tuple representing a path to a node within a tree.
229
- data -- The data containing the tree.
230
- """
231
- elem = path[0]
232
- #Get first element in tuple, and check if it contains a list.
233
- if type(data) is list:
234
- # Pop it, and let's continue..
235
- return self._fetch(path, data.pop())
236
- #Parse the url encoded data
237
- data = parse_qs(data)
238
- #Get the element in our path
239
- data = data.get(elem, None)
240
- #Offset the tuple by 1.
241
- path = path[1::1]
242
- #Check if the path has reached the end OR the element return
243
- #nothing.
244
- if len(path) is 0 or data is None:
245
- if type(data) is list and len(data) is 1:
246
- data = data.pop()
247
- return data
248
- else:
249
- # Nope, let's keep diggin'
250
- return self._fetch(path, data)
251
-
252
- def _get_video_info(self):
253
- """
254
- This is responsable for executing the request, extracting the
255
- necessary details, and populating the different video
256
- resolutions and formats into a list.
257
- """
258
- querystring = urlencode({'asv': 3, 'el': 'detailpage', 'hl': 'en_US',
259
- 'video_id': self.video_id})
260
-
261
- self.title = None
262
- self.videos = []
263
-
264
- response = urlopen(YT_BASE_URL + '?' + querystring)
265
-
266
- if response:
267
- content = response.read().decode()
268
- data = parse_qs(content)
269
- if 'errorcode' in data:
270
- error = data.get('reason', 'An unknown error has occurred')
271
- if isinstance(error, list):
272
- error = error.pop()
273
- raise YouTubeError(error)
274
-
275
- #Use my cool traversing method to extract the specific
276
- #attribute from the response body.
277
- path = ('url_encoded_fmt_stream_map', 'url')
278
- video_urls = self._fetch(path, content)
279
- #Get the video signatures, YouTube require them as an url component
280
- path = ('url_encoded_fmt_stream_map', 'sig')
281
- video_signatures = self._fetch(path, content)
282
- self.title = self._fetch(('title',), content)
283
-
284
- for idx in range(len(video_urls)):
285
- url = video_urls[idx]
286
- signature = video_signatures[idx]
287
- try:
288
- fmt, data = self._extract_fmt(url)
289
- filename = "%s.%s" % (self.filename, data['extension'])
290
- except (TypeError, KeyError):
291
- pass
292
- else:
293
- #Add video signature to url
294
- url = "%s&signature=%s" % (url, signature)
295
- v = Video(url, filename, **data)
296
- self.videos.append(v)
297
- self._fmt_values.append(fmt)
298
- self.videos.sort()
299
-
300
- def _extract_fmt(self, text):
301
- """
302
- YouTube does not pass you a completely valid URLencoded form,
303
- I suspect this is suppose to act as a deterrent.. Nothing some
304
- regulular expressions couldn't handle.
305
-
306
- Keyword arguments:
307
- text -- The malformed data contained within each url node.
308
- """
309
- itag = re.findall('itag=(\d+)', text)
310
- if itag and len(itag) is 1:
311
- itag = int(itag[0])
312
- attr = YT_ENCODING.get(itag, None)
313
- if not attr:
314
- return itag, None
315
- data = {}
316
- map(lambda k, v: data.update({k: v}), YT_ENCODING_KEYS, attr)
317
- return itag, data
318
-
319
-
320
- def safe_filename(text, max_length=200):
321
- """
322
- Sanitizes filenames for many operating systems.
323
-
324
- Keyword arguments:
325
- text -- The unsanitized pending filename.
326
- """
327
- #Quickly truncates long filenames.
328
- truncate = lambda text: text[:max_length].rsplit(' ', 0)[0]
329
-
330
- #Tidy up ugly formatted filenames.
331
- text = text.replace('_', ' ')
332
- text = text.replace(':', ' -')
333
-
334
- #NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
335
- ntfs = [chr(i) for i in range(0, 31)]
336
-
337
- #Removing these SHOULD make most filename safe for a wide range
338
- #of operating systems.
339
- paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
340
- '\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']
341
-
342
- blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
343
- filename = blacklist.sub('', text)
344
- return truncate(filename)
 
1
+ __title__ = 'pytube'
2
+ __version__ = '1.0.0'
3
+ __author__ = 'Nick Ficano'
4
+ __license__ = 'MIT License'
5
+ __copyright__ = 'Copyright 2013 Nick Ficano'
6
 
7
+ from .api import YouTube
 
 
 
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytube/api.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import unicode_literals
2
+
3
+ from .exceptions import MultipleObjectsReturned, YouTubeError
4
+ from .models import Video
5
+ from .utils import safe_filename
6
+ from urllib import urlencode
7
+ from urllib2 import urlopen
8
+ from urlparse import urlparse, parse_qs
9
+
10
+ import re
11
+
12
+ YT_BASE_URL = 'http://www.youtube.com/get_video_info'
13
+
14
+ #YouTube quality and codecs id map.
15
+ #source: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
16
+ YT_ENCODING = {
17
+ #Flash Video
18
+ 5: ["flv", "240p", "Sorenson H.263", "N/A", "0.25", "MP3", "64"],
19
+ 6: ["flv", "270p", "Sorenson H.263", "N/A", "0.8", "MP3", "64"],
20
+ 34: ["flv", "360p", "H.264", "Main", "0.5", "AAC", "128"],
21
+ 35: ["flv", "480p", "H.264", "Main", "0.8-1", "AAC", "128"],
22
+
23
+ #3GP
24
+ 36: ["3gp", "240p", "MPEG-4 Visual", "Simple", "0.17", "AAC", "38"],
25
+ 13: ["3gp", "N/A", "MPEG-4 Visual", "N/A", "0.5", "AAC", "N/A"],
26
+ 17: ["3gp", "144p", "MPEG-4 Visual", "Simple", "0.05", "AAC", "24"],
27
+
28
+ #MPEG-4
29
+ 18: ["mp4", "360p", "H.264", "Baseline", "0.5", "AAC", "96"],
30
+ 22: ["mp4", "720p", "H.264", "High", "2-2.9", "AAC", "192"],
31
+ 37: ["mp4", "1080p", "H.264", "High", "3-4.3", "AAC", "192"],
32
+ 38: ["mp4", "3072p", "H.264", "High", "3.5-5", "AAC", "192"],
33
+ 82: ["mp4", "360p", "H.264", "3D", "0.5", "AAC", "96"],
34
+ 83: ["mp4", "240p", "H.264", "3D", "0.5", "AAC", "96"],
35
+ 84: ["mp4", "720p", "H.264", "3D", "2-2.9", "AAC", "152"],
36
+ 85: ["mp4", "520p", "H.264", "3D", "2-2.9", "AAC", "152"],
37
+
38
+ #WebM
39
+ 43: ["webm", "360p", "VP8", "N/A", "0.5", "Vorbis", "128"],
40
+ 44: ["webm", "480p", "VP8", "N/A", "1", "Vorbis", "128"],
41
+ 45: ["webm", "720p", "VP8", "N/A", "2", "Vorbis", "192"],
42
+ 46: ["webm", "1080p", "VP8", "N/A", "N/A", "Vorbis", "192"],
43
+ 100: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "128"],
44
+ 101: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "192"],
45
+ 102: ["webm", "720p", "VP8", "3D", "N/A", "Vorbis", "192"]
46
+ }
47
+
48
+ # The keys corresponding to the quality/codec map above.
49
+ YT_ENCODING_KEYS = (
50
+ 'extension', 'resolution', 'video_codec', 'profile', 'video_bitrate',
51
+ 'audio_codec', 'audio_bitrate'
52
+ )
53
+
54
+ class YouTube(object):
55
+ _filename = None
56
+ _fmt_values = []
57
+ _video_url = None
58
+ title = None
59
+ videos = []
60
+ # fmt was an undocumented URL parameter that allowed selecting
61
+ # YouTube quality mode without using player user interface.
62
+
63
+ @property
64
+ def url(self):
65
+ """Exposes the video url."""
66
+ return self._video_url
67
+
68
+ @url.setter
69
+ def url(self, url):
70
+ """ Defines the URL of the YouTube video."""
71
+ self._video_url = url
72
+ #Reset the filename.
73
+ self._filename = None
74
+ #Get the video details.
75
+ self._get_video_info()
76
+
77
+ @property
78
+ def filename(self):
79
+ """
80
+ Exposes the title of the video. If this is not set, one is
81
+ generated based on the name of the video.
82
+ """
83
+ if not self._filename:
84
+ self._filename = safe_filename(self.title)
85
+ return self._filename
86
+
87
+ @filename.setter
88
+ def filename(self, filename):
89
+ """ Defines the filename."""
90
+ self._filename = filename
91
+
92
+ @property
93
+ def video_id(self):
94
+ """Gets the video ID extracted from the URL."""
95
+ parts = urlparse(self._video_url)
96
+ qs = getattr(parts, 'query', None)
97
+ if qs:
98
+ video_id = parse_qs(qs).get('v', None)
99
+ if video_id:
100
+ return video_id.pop()
101
+
102
+ def get(self, extension=None, res=None):
103
+ """
104
+ Return a single video given an extention and resolution.
105
+
106
+ Keyword arguments:
107
+ extention -- The desired file extention (e.g.: mp4).
108
+ res -- The desired broadcasting standard of the video (e.g.: 1080p).
109
+ """
110
+ result = []
111
+ for v in self.videos:
112
+ if extension and v.extension != extension:
113
+ continue
114
+ elif res and v.resolution != res:
115
+ continue
116
+ else:
117
+ result.append(v)
118
+ if not len(result):
119
+ return
120
+ elif len(result) is 1:
121
+ return result[0]
122
+ else:
123
+ d = len(result)
124
+ raise MultipleObjectsReturned("get() returned more than one "
125
+ "object -- it returned %d!" % d)
126
+
127
+ def filter(self, extension=None, res=None):
128
+ """
129
+ Return a filtered list of videos given an extention and
130
+ resolution criteria.
131
+
132
+ Keyword arguments:
133
+ extention -- The desired file extention (e.g.: mp4).
134
+ res -- The desired broadcasting standard of the video (e.g.: 1080p).
135
+ """
136
+ results = []
137
+ for v in self.videos:
138
+ if extension and v.extension != extension:
139
+ continue
140
+ elif res and v.resolution != res:
141
+ continue
142
+ else:
143
+ results.append(v)
144
+ return results
145
+
146
+ def _fetch(self, path, data):
147
+ """
148
+ Given a path, traverse the response for the desired data. (A
149
+ modified ver. of my dictionary traverse method:
150
+ https://gist.github.com/2009119)
151
+
152
+ Keyword arguments:
153
+ path -- A tuple representing a path to a node within a tree.
154
+ data -- The data containing the tree.
155
+ """
156
+ elem = path[0]
157
+ #Get first element in tuple, and check if it contains a list.
158
+ if type(data) is list:
159
+ # Pop it, and let's continue..
160
+ return self._fetch(path, data.pop())
161
+ #Parse the url encoded data
162
+ data = parse_qs(data)
163
+ #Get the element in our path
164
+ data = data.get(elem, None)
165
+ #Offset the tuple by 1.
166
+ path = path[1::1]
167
+ #Check if the path has reached the end OR the element return
168
+ #nothing.
169
+ if len(path) is 0 or data is None:
170
+ if type(data) is list and len(data) is 1:
171
+ data = data.pop()
172
+ return data
173
+ else:
174
+ # Nope, let's keep diggin'
175
+ return self._fetch(path, data)
176
+
177
+ def _get_video_info(self):
178
+ """
179
+ This is responsable for executing the request, extracting the
180
+ necessary details, and populating the different video
181
+ resolutions and formats into a list.
182
+ """
183
+ querystring = urlencode({'asv': 3, 'el': 'detailpage', 'hl': 'en_US',
184
+ 'video_id': self.video_id})
185
+
186
+ self.title = None
187
+ self.videos = []
188
+
189
+ response = urlopen(YT_BASE_URL + '?' + querystring)
190
+
191
+ if response:
192
+ content = response.read().decode()
193
+ data = parse_qs(content)
194
+ if 'errorcode' in data:
195
+ error = data.get('reason', 'An unknown error has occurred')
196
+ if isinstance(error, list):
197
+ error = error.pop()
198
+ raise YouTubeError(error)
199
+
200
+ #Use my cool traversing method to extract the specific
201
+ #attribute from the response body.
202
+ path = ('url_encoded_fmt_stream_map', 'url')
203
+ video_urls = self._fetch(path, content)
204
+ #Get the video signatures, YouTube require them as an url component
205
+ path = ('url_encoded_fmt_stream_map', 'sig')
206
+ video_signatures = self._fetch(path, content)
207
+ self.title = self._fetch(('title',), content)
208
+
209
+ for idx in range(len(video_urls)):
210
+ url = video_urls[idx]
211
+ signature = video_signatures[idx]
212
+ try:
213
+ fmt, data = self._extract_fmt(url)
214
+ filename = "%s.%s" % (self.filename, data['extension'])
215
+ except (TypeError, KeyError):
216
+ pass
217
+ else:
218
+ #Add video signature to url
219
+ url = "%s&signature=%s" % (url, signature)
220
+ v = Video(url, filename, **data)
221
+ self.videos.append(v)
222
+ self._fmt_values.append(fmt)
223
+ self.videos.sort()
224
+
225
+ def _extract_fmt(self, text):
226
+ """
227
+ YouTube does not pass you a completely valid URLencoded form,
228
+ I suspect this is suppose to act as a deterrent.. Nothing some
229
+ regulular expressions couldn't handle.
230
+
231
+ Keyword arguments:
232
+ text -- The malformed data contained within each url node.
233
+ """
234
+ itag = re.findall('itag=(\d+)', text)
235
+ if itag and len(itag) is 1:
236
+ itag = int(itag[0])
237
+ attr = YT_ENCODING.get(itag, None)
238
+ if not attr:
239
+ return itag, None
240
+ data = {}
241
+ map(lambda k, v: data.update({k: v}), YT_ENCODING_KEYS, attr)
242
+ return itag, data
pytube/exceptions.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class MultipleObjectsReturned(Exception):
2
+ """
3
+ The query returned multiple objects when only one was expected.
4
+ """
5
+ pass
6
+
7
+
8
+ class YouTubeError(Exception):
9
+ """
10
+ The REST interface returned an error.
11
+ """
12
+ pass
pytube/models.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os.path import normpath
2
+ from urllib2 import urlopen
3
+
4
+ class Video(object):
5
+ """
6
+ Class representation of a single instance of a YouTube video.
7
+ """
8
+ def __init__(self, url, filename, **attributes):
9
+ """
10
+ Define the variables required to declare a new video.
11
+
12
+ Keyword arguments:
13
+ extention -- The file extention the video should be saved as.
14
+ resolution -- The broadcasting standard of the video.
15
+ url -- The url of the video. (e.g.: youtube.com/watch?v=..)
16
+ filename -- The filename (minus the extention) to save the video.
17
+ """
18
+
19
+ self.url = url
20
+ self.filename = filename
21
+ self.__dict__.update(**attributes)
22
+
23
+ def download(self, path=None, chunk_size=8*1024):
24
+ """
25
+ Downloads the file of the URL defined within the class
26
+ instance.
27
+
28
+ Keyword arguments:
29
+ path -- Destination directory
30
+ chunk_size -- File size (in bytes) to write to buffer at a time
31
+ (default: 8 bytes).
32
+ """
33
+
34
+ path = (normpath(path) + '/' if path else '')
35
+ response = urlopen(self.url)
36
+ with open(path + self.filename, 'wb') as dst_file:
37
+ meta_data = dict(response.info().items())
38
+ file_size = int(meta_data.get("Content-Length") or
39
+ meta_data.get("content-length"))
40
+ self._bytes_received = 0
41
+ self._buffer = buffer
42
+ while True:
43
+ self._buffer = response.read(chunk_size)
44
+ if not self._buffer:
45
+ break
46
+
47
+ self._bytes_received += len(self._buffer)
48
+ dst_file.write(self._buffer)
49
+
50
+ def __repr__(self):
51
+ """A cleaner representation of the class instance."""
52
+ return "<Video: %s (.%s) - %s>" % (self.video_codec, self.extension,
53
+ self.resolution)
54
+
55
+ def __lt__(self, other):
56
+ if type(other) == Video:
57
+ v1 = "%s %s" % (self.extension, self.resolution)
58
+ v2 = "%s %s" % (other.extension, other.resolution)
59
+ return (v1 > v2) - (v1 < v2) < 0
pytube/utils.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def safe_filename(text, max_length=200):
4
+ """
5
+ Sanitizes filenames for many operating systems.
6
+
7
+ Keyword arguments:
8
+ text -- The unsanitized pending filename.
9
+ """
10
+ #Quickly truncates long filenames.
11
+ truncate = lambda text: text[:max_length].rsplit(' ', 0)[0]
12
+
13
+ #Tidy up ugly formatted filenames.
14
+ text = text.replace('_', ' ')
15
+ text = text.replace(':', ' -')
16
+
17
+ #NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
18
+ ntfs = [chr(i) for i in range(0, 31)]
19
+
20
+ #Removing these SHOULD make most filename safe for a wide range
21
+ #of operating systems.
22
+ paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
23
+ '\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']
24
+
25
+ blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
26
+ filename = blacklist.sub('', text)
27
+ return truncate(filename)
tests/__init__.py ADDED
File without changes