Ferdowsi
/

pytube

Model card Files Files and versions Community

nficano commited on Jan 9, 2013

Commit

f984924

1 Parent(s): aaf6096

Python 3.x! Reorganized file structure, removed print statements.

Browse files

Files changed (6) hide show

pytube/__init__.py +6 -342
pytube/api.py +242 -0
pytube/exceptions.py +12 -0
pytube/models.py +59 -0
pytube/utils.py +27 -0
tests/__init__.py +0 -0

pytube/__init__.py CHANGED Viewed

@@ -1,344 +1,8 @@
-from __future__ import unicode_literals
-from os.path import normpath
-from urllib import urlencode
-from urllib2 import urlopen
-from urlparse import urlparse, parse_qs
-import re
-YT_BASE_URL = 'http://www.youtube.com/get_video_info'
-#YouTube quality and codecs id map.
-#source: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
-YT_ENCODING = {
-    #Flash Video
-    5: ["flv", "240p", "Sorenson H.263", "N/A", "0.25", "MP3", "64"],
-    6: ["flv", "270p", "Sorenson H.263", "N/A", "0.8", "MP3", "64"],
-    34: ["flv", "360p", "H.264", "Main", "0.5", "AAC", "128"],
-    35: ["flv", "480p", "H.264", "Main", "0.8-1", "AAC", "128"],
-    #3GP
-    36: ["3gp", "240p", "MPEG-4 Visual", "Simple", "0.17", "AAC", "38"],
-    13: ["3gp", "N/A", "MPEG-4 Visual", "N/A", "0.5", "AAC", "N/A"],
-    17: ["3gp", "144p", "MPEG-4 Visual", "Simple", "0.05", "AAC", "24"],
-    #MPEG-4
-    18: ["mp4", "360p", "H.264", "Baseline", "0.5", "AAC", "96"],
-    22: ["mp4", "720p", "H.264", "High", "2-2.9", "AAC", "192"],
-    37: ["mp4", "1080p", "H.264", "High", "3-4.3", "AAC", "192"],
-    38: ["mp4", "3072p", "H.264", "High", "3.5-5", "AAC", "192"],
-    82: ["mp4", "360p", "H.264", "3D", "0.5", "AAC", "96"],
-    83: ["mp4", "240p", "H.264", "3D", "0.5", "AAC", "96"],
-    84: ["mp4", "720p", "H.264", "3D", "2-2.9", "AAC", "152"],
-    85: ["mp4", "520p", "H.264", "3D", "2-2.9", "AAC", "152"],
-    #WebM
-    43: ["webm", "360p", "VP8", "N/A", "0.5", "Vorbis", "128"],
-    44: ["webm", "480p", "VP8", "N/A", "1", "Vorbis", "128"],
-    45: ["webm", "720p", "VP8", "N/A", "2", "Vorbis", "192"],
-    46: ["webm", "1080p", "VP8", "N/A", "N/A", "Vorbis", "192"],
-    100: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "128"],
-    101: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "192"],
-    102: ["webm", "720p", "VP8", "3D", "N/A", "Vorbis", "192"]
-}
-# The keys corresponding to the quality/codec map above.
-YT_ENCODING_KEYS = (
-    'extension', 'resolution', 'video_codec', 'profile', 'video_bitrate',
-    'audio_codec', 'audio_bitrate'
-)
-class MultipleObjectsReturned(Exception):
-    """
-    The query returned multiple objects when only one was expected.
-    """
-    pass
-class YouTubeError(Exception):
-    """
-    The REST interface returned an error.
-    """
-    pass
-class Video(object):
-    """
-    Class representation of a single instance of a YouTube video.
-    """
-    def __init__(self, url, filename, **attributes):
-        """
-        Define the variables required to declare a new video.
-        Keyword arguments:
-        extention -- The file extention the video should be saved as.
-        resolution -- The broadcasting standard of the video.
-        url -- The url of the video. (e.g.: youtube.com/watch?v=..)
-        filename -- The filename (minus the extention) to save the video.
-        """
-        self.url = url
-        self.filename = filename
-        self.__dict__.update(**attributes)
-    def download(self, path=None):
-        """
-        Downloads the file of the URL defined within the class
-        instance.
-        Keyword arguments:
-        path -- Destination directory
-        """
-        path = (normpath(path) + '/' if path else '')
-        response = urlopen(self.url)
-        with open(path + self.filename, 'wb') as dst_file:
-            meta_data = dict(response.info().items())
-            file_size = int(meta_data.get("Content-Length") or
-                            meta_data.get("content-length"))
-            print "Downloading: %s Bytes: %s" % (self.filename, file_size)
-            bytes_received = 0
-            chunk_size = 8192
-            while True:
-                buffer = response.read(chunk_size)
-                if not buffer:
-                    break
-                bytes_received += len(buffer)
-                dst_file.write(buffer)
-                percent = bytes_received * 100. / file_size
-                status = r"%10d  [%3.2f%%]" % (bytes_received, percent)
-                status = status + chr(8) * (len(status) + 1)
-                print status,
-    def __repr__(self):
-        """A cleaner representation of the class instance."""
-        return "<Video: %s (.%s) - %s>" % (self.video_codec, self.extension,
-                                           self.resolution)
-    def __lt__(self, other):
-        if type(other) == Video:
-            v1 = "%s %s" % (self.extension, self.resolution)
-            v2 = "%s %s" % (other.extension, other.resolution)
-            return (v1 > v2) - (v1 < v2) < 0
-class YouTube(object):
-    _filename = None
-    _fmt_values = []
-    _video_url = None
-    title = None
-    videos = []
-    # fmt was an undocumented URL parameter that allowed selecting
-    # YouTube quality mode without using player user interface.
-    @property
-    def url(self):
-        """Exposes the video url."""
-        return self._video_url
-    @url.setter
-    def url(self, url):
-        """ Defines the URL of the YouTube video."""
-        self._video_url = url
-        #Reset the filename.
-        self._filename = None
-        #Get the video details.
-        self._get_video_info()
-    @property
-    def filename(self):
-        """
-        Exposes the title of the video. If this is not set, one is
-        generated based on the name of the video.
-        """
-        if not self._filename:
-            self._filename = safe_filename(self.title)
-        return self._filename
-    @filename.setter
-    def filename(self, filename):
-        """ Defines the filename."""
-        self._filename = filename
-    @property
-    def video_id(self):
-        """Gets the video ID extracted from the URL."""
-        parts = urlparse(self._video_url)
-        qs = getattr(parts, 'query', None)
-        if qs:
-            video_id = parse_qs(qs).get('v', None)
-            if video_id:
-                return video_id.pop()
-    def get(self, extension=None, res=None):
-        """
-        Return a single video given an extention and resolution.
-        Keyword arguments:
-        extention -- The desired file extention (e.g.: mp4).
-        res -- The desired broadcasting standard of the video (e.g.: 1080p).
-        """
-        result = []
-        for v in self.videos:
-            if extension and v.extension != extension:
-                continue
-            elif res and v.resolution != res:
-                continue
-            else:
-                result.append(v)
-        if not len(result):
-            return
-        elif len(result) is 1:
-            return result[0]
-        else:
-            d = len(result)
-            raise MultipleObjectsReturned("get() returned more than one "
-                                          "object -- it returned %d!" % d)
-    def filter(self, extension=None, res=None):
-        """
-        Return a filtered list of videos given an extention and
-        resolution criteria.
-        Keyword arguments:
-        extention -- The desired file extention (e.g.: mp4).
-        res -- The desired broadcasting standard of the video (e.g.: 1080p).
-        """
-        results = []
-        for v in self.videos:
-            if extension and v.extension != extension:
-                continue
-            elif res and v.resolution != res:
-                continue
-            else:
-                results.append(v)
-        return results
-    def _fetch(self, path, data):
-        """
-        Given a path, traverse the response for the desired data. (A
-        modified ver. of my dictionary traverse method:
-        https://gist.github.com/2009119)
-        Keyword arguments:
-        path -- A tuple representing a path to a node within a tree.
-        data -- The data containing the tree.
-        """
-        elem = path[0]
-        #Get first element in tuple, and check if it contains a list.
-        if type(data) is list:
-            # Pop it, and let's continue..
-            return self._fetch(path, data.pop())
-        #Parse the url encoded data
-        data = parse_qs(data)
-        #Get the element in our path
-        data = data.get(elem, None)
-        #Offset the tuple by 1.
-        path = path[1::1]
-        #Check if the path has reached the end OR the element return
-        #nothing.
-        if len(path) is 0 or data is None:
-            if type(data) is list and len(data) is 1:
-                data = data.pop()
-            return data
-        else:
-            # Nope, let's keep diggin'
-            return self._fetch(path, data)
-    def _get_video_info(self):
-        """
-        This is responsable for executing the request, extracting the
-        necessary details, and populating the different video
-        resolutions and formats into a list.
-        """
-        querystring = urlencode({'asv': 3, 'el': 'detailpage', 'hl': 'en_US',
-                                 'video_id': self.video_id})
-        self.title = None
-        self.videos = []
-        response = urlopen(YT_BASE_URL + '?' + querystring)
-        if response:
-            content = response.read().decode()
-            data = parse_qs(content)
-            if 'errorcode' in data:
-                error = data.get('reason', 'An unknown error has occurred')
-                if isinstance(error, list):
-                    error = error.pop()
-                raise YouTubeError(error)
-            #Use my cool traversing method to extract the specific
-            #attribute from the response body.
-            path = ('url_encoded_fmt_stream_map', 'url')
-            video_urls = self._fetch(path, content)
-            #Get the video signatures, YouTube require them as an url component
-            path = ('url_encoded_fmt_stream_map', 'sig')
-            video_signatures = self._fetch(path, content)
-            self.title = self._fetch(('title',), content)
-            for idx in range(len(video_urls)):
-                url = video_urls[idx]
-                signature = video_signatures[idx]
-                try:
-                    fmt, data = self._extract_fmt(url)
-                    filename = "%s.%s" % (self.filename, data['extension'])
-                except (TypeError, KeyError):
-                    pass
-                else:
-                    #Add video signature to url
-                    url = "%s&signature=%s" % (url, signature)
-                    v = Video(url, filename, **data)
-                    self.videos.append(v)
-                    self._fmt_values.append(fmt)
-            self.videos.sort()
-    def _extract_fmt(self, text):
-        """
-        YouTube does not pass you a completely valid URLencoded form,
-        I suspect this is suppose to act as a deterrent.. Nothing some
-        regulular expressions couldn't handle.
-        Keyword arguments:
-        text -- The malformed data contained within each url node.
-        """
-        itag = re.findall('itag=(\d+)', text)
-        if itag and len(itag) is 1:
-            itag = int(itag[0])
-            attr = YT_ENCODING.get(itag, None)
-            if not attr:
-                return itag, None
-            data = {}
-            map(lambda k, v: data.update({k: v}), YT_ENCODING_KEYS, attr)
-            return itag, data
-def safe_filename(text, max_length=200):
-    """
-    Sanitizes filenames for many operating systems.
-    Keyword arguments:
-    text -- The unsanitized pending filename.
-    """
-    #Quickly truncates long filenames.
-    truncate = lambda text: text[:max_length].rsplit(' ', 0)[0]
-    #Tidy up ugly formatted filenames.
-    text = text.replace('_', ' ')
-    text = text.replace(':', ' -')
-    #NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
-    ntfs = [chr(i) for i in range(0, 31)]
-    #Removing these SHOULD make most filename safe for a wide range
-    #of operating systems.
-    paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
-                '\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']
-    blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
-    filename = blacklist.sub('', text)
-    return truncate(filename)

+__title__ = 'pytube'
+__version__ = '1.0.0'
+__author__ = 'Nick Ficano'
+__license__ = 'MIT License'
+__copyright__ = 'Copyright 2013 Nick Ficano'
+from .api import YouTube

pytube/api.py ADDED Viewed

	@@ -0,0 +1,242 @@

+from __future__ import unicode_literals
+from .exceptions import MultipleObjectsReturned, YouTubeError
+from .models import Video
+from .utils import safe_filename
+from urllib import urlencode
+from urllib2 import urlopen
+from urlparse import urlparse, parse_qs
+import re
+YT_BASE_URL = 'http://www.youtube.com/get_video_info'
+#YouTube quality and codecs id map.
+#source: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
+YT_ENCODING = {
+    #Flash Video
+    5: ["flv", "240p", "Sorenson H.263", "N/A", "0.25", "MP3", "64"],
+    6: ["flv", "270p", "Sorenson H.263", "N/A", "0.8", "MP3", "64"],
+    34: ["flv", "360p", "H.264", "Main", "0.5", "AAC", "128"],
+    35: ["flv", "480p", "H.264", "Main", "0.8-1", "AAC", "128"],
+    #3GP
+    36: ["3gp", "240p", "MPEG-4 Visual", "Simple", "0.17", "AAC", "38"],
+    13: ["3gp", "N/A", "MPEG-4 Visual", "N/A", "0.5", "AAC", "N/A"],
+    17: ["3gp", "144p", "MPEG-4 Visual", "Simple", "0.05", "AAC", "24"],
+    #MPEG-4
+    18: ["mp4", "360p", "H.264", "Baseline", "0.5", "AAC", "96"],
+    22: ["mp4", "720p", "H.264", "High", "2-2.9", "AAC", "192"],
+    37: ["mp4", "1080p", "H.264", "High", "3-4.3", "AAC", "192"],
+    38: ["mp4", "3072p", "H.264", "High", "3.5-5", "AAC", "192"],
+    82: ["mp4", "360p", "H.264", "3D", "0.5", "AAC", "96"],
+    83: ["mp4", "240p", "H.264", "3D", "0.5", "AAC", "96"],
+    84: ["mp4", "720p", "H.264", "3D", "2-2.9", "AAC", "152"],
+    85: ["mp4", "520p", "H.264", "3D", "2-2.9", "AAC", "152"],
+    #WebM
+    43: ["webm", "360p", "VP8", "N/A", "0.5", "Vorbis", "128"],
+    44: ["webm", "480p", "VP8", "N/A", "1", "Vorbis", "128"],
+    45: ["webm", "720p", "VP8", "N/A", "2", "Vorbis", "192"],
+    46: ["webm", "1080p", "VP8", "N/A", "N/A", "Vorbis", "192"],
+    100: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "128"],
+    101: ["webm", "360p", "VP8", "3D", "N/A", "Vorbis", "192"],
+    102: ["webm", "720p", "VP8", "3D", "N/A", "Vorbis", "192"]
+}
+# The keys corresponding to the quality/codec map above.
+YT_ENCODING_KEYS = (
+    'extension', 'resolution', 'video_codec', 'profile', 'video_bitrate',
+    'audio_codec', 'audio_bitrate'
+)
+class YouTube(object):
+    _filename = None
+    _fmt_values = []
+    _video_url = None
+    title = None
+    videos = []
+    # fmt was an undocumented URL parameter that allowed selecting
+    # YouTube quality mode without using player user interface.
+    @property
+    def url(self):
+        """Exposes the video url."""
+        return self._video_url
+    @url.setter
+    def url(self, url):
+        """ Defines the URL of the YouTube video."""
+        self._video_url = url
+        #Reset the filename.
+        self._filename = None
+        #Get the video details.
+        self._get_video_info()
+    @property
+    def filename(self):
+        """
+        Exposes the title of the video. If this is not set, one is
+        generated based on the name of the video.
+        """
+        if not self._filename:
+            self._filename = safe_filename(self.title)
+        return self._filename
+    @filename.setter
+    def filename(self, filename):
+        """ Defines the filename."""
+        self._filename = filename
+    @property
+    def video_id(self):
+        """Gets the video ID extracted from the URL."""
+        parts = urlparse(self._video_url)
+        qs = getattr(parts, 'query', None)
+        if qs:
+            video_id = parse_qs(qs).get('v', None)
+            if video_id:
+                return video_id.pop()
+    def get(self, extension=None, res=None):
+        """
+        Return a single video given an extention and resolution.
+        Keyword arguments:
+        extention -- The desired file extention (e.g.: mp4).
+        res -- The desired broadcasting standard of the video (e.g.: 1080p).
+        """
+        result = []
+        for v in self.videos:
+            if extension and v.extension != extension:
+                continue
+            elif res and v.resolution != res:
+                continue
+            else:
+                result.append(v)
+        if not len(result):
+            return
+        elif len(result) is 1:
+            return result[0]
+        else:
+            d = len(result)
+            raise MultipleObjectsReturned("get() returned more than one "
+                                          "object -- it returned %d!" % d)
+    def filter(self, extension=None, res=None):
+        """
+        Return a filtered list of videos given an extention and
+        resolution criteria.
+        Keyword arguments:
+        extention -- The desired file extention (e.g.: mp4).
+        res -- The desired broadcasting standard of the video (e.g.: 1080p).
+        """
+        results = []
+        for v in self.videos:
+            if extension and v.extension != extension:
+                continue
+            elif res and v.resolution != res:
+                continue
+            else:
+                results.append(v)
+        return results
+    def _fetch(self, path, data):
+        """
+        Given a path, traverse the response for the desired data. (A
+        modified ver. of my dictionary traverse method:
+        https://gist.github.com/2009119)
+        Keyword arguments:
+        path -- A tuple representing a path to a node within a tree.
+        data -- The data containing the tree.
+        """
+        elem = path[0]
+        #Get first element in tuple, and check if it contains a list.
+        if type(data) is list:
+            # Pop it, and let's continue..
+            return self._fetch(path, data.pop())
+        #Parse the url encoded data
+        data = parse_qs(data)
+        #Get the element in our path
+        data = data.get(elem, None)
+        #Offset the tuple by 1.
+        path = path[1::1]
+        #Check if the path has reached the end OR the element return
+        #nothing.
+        if len(path) is 0 or data is None:
+            if type(data) is list and len(data) is 1:
+                data = data.pop()
+            return data
+        else:
+            # Nope, let's keep diggin'
+            return self._fetch(path, data)
+    def _get_video_info(self):
+        """
+        This is responsable for executing the request, extracting the
+        necessary details, and populating the different video
+        resolutions and formats into a list.
+        """
+        querystring = urlencode({'asv': 3, 'el': 'detailpage', 'hl': 'en_US',
+                                 'video_id': self.video_id})
+        self.title = None
+        self.videos = []
+        response = urlopen(YT_BASE_URL + '?' + querystring)
+        if response:
+            content = response.read().decode()
+            data = parse_qs(content)
+            if 'errorcode' in data:
+                error = data.get('reason', 'An unknown error has occurred')
+                if isinstance(error, list):
+                    error = error.pop()
+                raise YouTubeError(error)
+            #Use my cool traversing method to extract the specific
+            #attribute from the response body.
+            path = ('url_encoded_fmt_stream_map', 'url')
+            video_urls = self._fetch(path, content)
+            #Get the video signatures, YouTube require them as an url component
+            path = ('url_encoded_fmt_stream_map', 'sig')
+            video_signatures = self._fetch(path, content)
+            self.title = self._fetch(('title',), content)
+            for idx in range(len(video_urls)):
+                url = video_urls[idx]
+                signature = video_signatures[idx]
+                try:
+                    fmt, data = self._extract_fmt(url)
+                    filename = "%s.%s" % (self.filename, data['extension'])
+                except (TypeError, KeyError):
+                    pass
+                else:
+                    #Add video signature to url
+                    url = "%s&signature=%s" % (url, signature)
+                    v = Video(url, filename, **data)
+                    self.videos.append(v)
+                    self._fmt_values.append(fmt)
+            self.videos.sort()
+    def _extract_fmt(self, text):
+        """
+        YouTube does not pass you a completely valid URLencoded form,
+        I suspect this is suppose to act as a deterrent.. Nothing some
+        regulular expressions couldn't handle.
+        Keyword arguments:
+        text -- The malformed data contained within each url node.
+        """
+        itag = re.findall('itag=(\d+)', text)
+        if itag and len(itag) is 1:
+            itag = int(itag[0])
+            attr = YT_ENCODING.get(itag, None)
+            if not attr:
+                return itag, None
+            data = {}
+            map(lambda k, v: data.update({k: v}), YT_ENCODING_KEYS, attr)
+            return itag, data

pytube/exceptions.py ADDED Viewed

	@@ -0,0 +1,12 @@

+class MultipleObjectsReturned(Exception):
+    """
+    The query returned multiple objects when only one was expected.
+    """
+    pass
+class YouTubeError(Exception):
+    """
+    The REST interface returned an error.
+    """
+    pass

pytube/models.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from os.path import normpath
+from urllib2 import urlopen
+class Video(object):
+    """
+    Class representation of a single instance of a YouTube video.
+    """
+    def __init__(self, url, filename, **attributes):
+        """
+        Define the variables required to declare a new video.
+        Keyword arguments:
+        extention -- The file extention the video should be saved as.
+        resolution -- The broadcasting standard of the video.
+        url -- The url of the video. (e.g.: youtube.com/watch?v=..)
+        filename -- The filename (minus the extention) to save the video.
+        """
+        self.url = url
+        self.filename = filename
+        self.__dict__.update(**attributes)
+    def download(self, path=None, chunk_size=8*1024):
+        """
+        Downloads the file of the URL defined within the class
+        instance.
+        Keyword arguments:
+        path -- Destination directory
+        chunk_size -- File size (in bytes) to write to buffer at a time
+        (default: 8 bytes).
+        """
+        path = (normpath(path) + '/' if path else '')
+        response = urlopen(self.url)
+        with open(path + self.filename, 'wb') as dst_file:
+            meta_data = dict(response.info().items())
+            file_size = int(meta_data.get("Content-Length") or
+                            meta_data.get("content-length"))
+            self._bytes_received = 0
+            self._buffer = buffer
+            while True:
+                self._buffer = response.read(chunk_size)
+                if not self._buffer:
+                    break
+                self._bytes_received += len(self._buffer)
+                dst_file.write(self._buffer)
+    def __repr__(self):
+        """A cleaner representation of the class instance."""
+        return "<Video: %s (.%s) - %s>" % (self.video_codec, self.extension,
+                                           self.resolution)
+    def __lt__(self, other):
+        if type(other) == Video:
+            v1 = "%s %s" % (self.extension, self.resolution)
+            v2 = "%s %s" % (other.extension, other.resolution)
+            return (v1 > v2) - (v1 < v2) < 0

pytube/utils.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import re
+def safe_filename(text, max_length=200):
+    """
+    Sanitizes filenames for many operating systems.
+    Keyword arguments:
+    text -- The unsanitized pending filename.
+    """
+    #Quickly truncates long filenames.
+    truncate = lambda text: text[:max_length].rsplit(' ', 0)[0]
+    #Tidy up ugly formatted filenames.
+    text = text.replace('_', ' ')
+    text = text.replace(':', ' -')
+    #NTFS forbids filenames containing characters in range 0-31 (0x00-0x1F)
+    ntfs = [chr(i) for i in range(0, 31)]
+    #Removing these SHOULD make most filename safe for a wide range
+    #of operating systems.
+    paranoid = ['\"', '\#', '\$', '\%', '\'', '\*', '\,', '\.', '\/', '\:',
+                '\;', '\<', '\>', '\?', '\\', '\^', '\|', '\~', '\\\\']
+    blacklist = re.compile('|'.join(ntfs + paranoid), re.UNICODE)
+    filename = blacklist.sub('', text)
+    return truncate(filename)

tests/__init__.py ADDED Viewed

File without changes