@@ -1,69 +1,68 @@
1 |
2 |
<div align="center">
3 |
4 |
<img src="" width="350" height="328" alt="pytube logo" />
5 |
6 |
<p align="center">
7 |
<img src="
8 |
<a href="https://
9 |
<a href="
10 |
<a href=
11 |
12 |
<a href="https://
13 |
14 |
15 |
16 |
17 |
*pytube* is a very serious, lightweight, dependency-free Python library (and command-line utility) for downloading YouTube Videos.
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
### Behold, a perfect balance of simplicity versus flexibility:
29 |
30 |
31 |
>>> YouTube
32 |
>>> yt = YouTube('')
33 |
>>> yt.streams
34 |
... .filter(progressive=True, file_extension='mp4')
35 |
... .order_by('resolution')
36 |
... .desc()
37 |
... .first()
38 |
... .download()
39 |
40 |
41 |
## Features
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
Download using pip via pypi.
55 |
56 |
57 |
$ pip install pytube
58 |
59 |
60 |
## Getting started
61 |
62 |
Let's begin with showing how easy it is to download a video with pytube:
63 |
64 |
65 |
>>> from pytube import YouTube
66 |
>>> YouTube('').streams.
67 |
68 |
This example will download the highest quality progressive download stream available.
69 |
@@ -71,7 +70,7 @@ Next, let's explore how we would view what video streams are available:
71 |
72 |
73 |
>>> yt = YouTube('')
74 |
>>> yt.streams
75 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
76 |
<Stream: itag="43" mime_type="video/webm" res="360p" fps="30fps" vcodec="vp8.0" acodec="vorbis">,
77 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">,
@@ -95,6 +94,9 @@ Next, let's explore how we would view what video streams are available:
95 |
<Stream: itag="250" mime_type="audio/webm" abr="70kbps" acodec="opus">,
96 |
<Stream: itag="251" mime_type="audio/webm" abr="160kbps" acodec="opus">]
97 |
98 |
You may notice that some streams listed have both a video codec and audio codec, while others have just video or just audio, this is a result of YouTube supporting a streaming technique called Dynamic Adaptive Streaming over HTTP (DASH).
99 |
100 |
In the context of pytube, the implications are for the highest quality streams; you now need to download both the audio and video tracks and then post-process them with software like FFmpeg to merge them.
@@ -104,7 +106,7 @@ The legacy streams that contain the audio and video in a single file (referred t
104 |
To only view these progressive download streams:
105 |
106 |
107 |
>>> yt.streams.filter(progressive=True)
108 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
109 |
<Stream: itag="43" mime_type="video/webm" res="360p" fps="30fps" vcodec="vp8.0" acodec="vorbis">,
110 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">,
@@ -115,7 +117,7 @@ To only view these progressive download streams:
115 |
Conversely, if you only want to see the DASH streams (also referred to as "adaptive") you can do:
116 |
117 |
118 |
>>> yt.streams.filter(adaptive=True)
119 |
[<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" vcodec="avc1.640028">,
120 |
<Stream: itag="248" mime_type="video/webm" res="1080p" fps="30fps" vcodec="vp9">,
121 |
<Stream: itag="136" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.4d401f">,
@@ -135,24 +137,26 @@ Conversely, if you only want to see the DASH streams (also referred to as "adapt
135 |
<Stream: itag="251" mime_type="audio/webm" abr="160kbps" acodec="opus">]
136 |
137 |
138 |
You can also download a complete Youtube playlist:
139 |
140 |
141 |
>>> from pytube import Playlist
142 |
143 |
144 |
145 |
>>> pl.download_all('/path/to/directory/')
146 |
147 |
This will download the highest progressive stream available (generally 720p) from the given playlist.
148 |
149 |
150 |
Pytube allows you to filter on every property available (see the documentation for the complete list), let's take a look at some of the most useful ones.
151 |
152 |
To list the audio only streams:
153 |
154 |
155 |
>>> yt.streams.filter(only_audio=True)
156 |
[<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" acodec="mp4a.40.2">,
157 |
<Stream: itag="171" mime_type="audio/webm" abr="128kbps" acodec="vorbis">,
158 |
<Stream: itag="249" mime_type="audio/webm" abr="50kbps" acodec="opus">,
@@ -163,7 +167,7 @@ To list the audio only streams:
163 |
To list only ``mp4`` streams:
164 |
165 |
166 |
>>> yt.streams.filter(subtype='mp4')
167 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
168 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">,
169 |
<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" vcodec="avc1.640028">,
@@ -178,9 +182,9 @@ To list only ``mp4`` streams:
178 |
Multiple filters can also be specified:
179 |
180 |
181 |
>>> yt.streams.filter(subtype='mp4', progressive=True)
182 |
>>> # this can also be expressed as:
183 |
>>> yt.streams.filter(subtype='mp4').filter(progressive=True)
184 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
185 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">]
186 |
@@ -194,14 +198,16 @@ You also have an interface to select streams by their itag, without needing to f
194 |
If you need to optimize for a specific feature, such as the "highest resolution" or "lowest average bitrate":
195 |
196 |
197 |
>>> yt.streams.filter(progressive=True).order_by('resolution').desc()
198 |
199 |
200 |
201 |
If your application requires post-processing logic, pytube allows you to specify an "on download complete" callback function:
202 |
203 |
204 |
>>> def convert_to_aac(stream,
205 |
return # do work
206 |
207 |
>>> yt.register_on_complete_callback(convert_to_aac)
@@ -210,7 +216,7 @@ If your application requires post-processing logic, pytube allows you to specify
210 |
Similarly, if your application requires on-download progress logic, pytube exposes a callback for this as well:
211 |
212 |
213 |
>>> def show_progress_bar(stream, chunk
214 |
return # do work
215 |
216 |
>>> yt.register_on_progress_callback(show_progress_bar)
@@ -218,17 +224,86 @@ Similarly, if your application requires on-download progress logic, pytube expos
218 |
219 |
## Command-line interface
220 |
221 |
222 |
223 |
Let's start with downloading:
224 |
225 |
226 |
227 |
228 |
To view available streams:
229 |
230 |
231 |
232 |
233 |
234 |
1 |
2 |
<div align="center">
3 |
<p align="center">
4 |
<a href=""><img src="" alt="pypi"></a>
5 |
<a href=""><img src="" /></a>
6 |
<a href=""><img src="" /></a>
7 |
<a href=''><img src='' alt='Documentation Status' /></a>
8 |
<a href=""><img src="" /></a>
9 |
<a href=""><img src="" alt="CodeFactor" /></a>
10 |
<a href=""><img src="" /></a>
11 |
12 |
13 |
14 |
# pytube3
15 |
16 |
## Table of Contents
17 |
* [Installation](#installation)
18 |
* [Quick start](#quick-start)
19 |
* [Features](#features)
20 |
* [Usage](#usage)
21 |
* [Command-line interface](#command-line-interface)
22 |
* [Development](#development)
23 |
* [GUIs and other libraries](#guis-and-other-libraries)
24 |
25 |
## Installation
26 |
27 |
Download using pip via pypi.
28 |
29 |
30 |
$ pip install pytube3 --upgrade
31 |
32 |
(Mac/homebrew users may need to use ``pip3``)
33 |
34 |
35 |
## Quick start
36 |
37 |
>>> from pytube import YouTube
38 |
>>> YouTube('').streams.get_highest_resolution().download()
39 |
40 |
>>> yt = YouTube('')
41 |
>>> yt.streams
42 |
... .filter(progressive=True, file_extension='mp4')
43 |
... .order_by('resolution')[-1]
44 |
... .download()
45 |
46 |
A GUI frontend for pytube3 is available at [YouTubeDownload](
47 |
48 |
## Features
49 |
* Support for Both Progressive & DASH Streams
50 |
* Support for downloading complete playlist
51 |
* Easily Register ``on_download_progress`` & ``on_download_complete`` callbacks
52 |
* Command-line Interfaced Included
53 |
* Caption Track Support
54 |
* Outputs Caption Tracks to .srt format (SubRip Subtitle)
55 |
* Ability to Capture Thumbnail URL.
56 |
* Extensively Documented Source Code
57 |
* No Third-Party Dependencies
58 |
59 |
## Usage
60 |
61 |
Let's begin with showing how easy it is to download a video with pytube:
62 |
63 |
64 |
>>> from pytube import YouTube
65 |
>>> YouTube('').streams[0].download()
66 |
67 |
This example will download the highest quality progressive download stream available.
68 |
70 |
71 |
72 |
>>> yt = YouTube('')
73 |
>>> print(yt.streams)
74 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
75 |
<Stream: itag="43" mime_type="video/webm" res="360p" fps="30fps" vcodec="vp8.0" acodec="vorbis">,
76 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">,
94 |
<Stream: itag="250" mime_type="audio/webm" abr="70kbps" acodec="opus">,
95 |
<Stream: itag="251" mime_type="audio/webm" abr="160kbps" acodec="opus">]
96 |
97 |
98 |
### Selecting an itag
99 |
100 |
You may notice that some streams listed have both a video codec and audio codec, while others have just video or just audio, this is a result of YouTube supporting a streaming technique called Dynamic Adaptive Streaming over HTTP (DASH).
101 |
102 |
In the context of pytube, the implications are for the highest quality streams; you now need to download both the audio and video tracks and then post-process them with software like FFmpeg to merge them.
106 |
To only view these progressive download streams:
107 |
108 |
109 |
>>> yt.streams.filter(progressive=True)
110 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
111 |
<Stream: itag="43" mime_type="video/webm" res="360p" fps="30fps" vcodec="vp8.0" acodec="vorbis">,
112 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">,
117 |
Conversely, if you only want to see the DASH streams (also referred to as "adaptive") you can do:
118 |
119 |
120 |
>>> yt.streams.filter(adaptive=True)
121 |
[<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" vcodec="avc1.640028">,
122 |
<Stream: itag="248" mime_type="video/webm" res="1080p" fps="30fps" vcodec="vp9">,
123 |
<Stream: itag="136" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.4d401f">,
137 |
<Stream: itag="251" mime_type="audio/webm" abr="160kbps" acodec="opus">]
138 |
139 |
140 |
### Playlists
141 |
142 |
You can also download a complete Youtube playlist:
143 |
144 |
145 |
>>> from pytube import Playlist
146 |
>>> playlist = Playlist("")
147 |
>>> for video in playlist:
148 |
>>> video.streams.get_highest_resolution().download()
149 |
150 |
This will download the highest progressive stream available (generally 720p) from the given playlist.
151 |
152 |
### Filtering
153 |
154 |
Pytube allows you to filter on every property available (see the documentation for the complete list), let's take a look at some of the most useful ones.
155 |
156 |
To list the audio only streams:
157 |
158 |
159 |
>>> yt.streams.filter(only_audio=True)
160 |
[<Stream: itag="140" mime_type="audio/mp4" abr="128kbps" acodec="mp4a.40.2">,
161 |
<Stream: itag="171" mime_type="audio/webm" abr="128kbps" acodec="vorbis">,
162 |
<Stream: itag="249" mime_type="audio/webm" abr="50kbps" acodec="opus">,
167 |
To list only ``mp4`` streams:
168 |
169 |
170 |
>>> yt.streams.filter(subtype='mp4')
171 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
172 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">,
173 |
<Stream: itag="137" mime_type="video/mp4" res="1080p" fps="30fps" vcodec="avc1.640028">,
182 |
Multiple filters can also be specified:
183 |
184 |
185 |
>>> yt.streams.filter(subtype='mp4', progressive=True)
186 |
>>> # this can also be expressed as:
187 |
>>> yt.streams.filter(subtype='mp4').filter(progressive=True)
188 |
[<Stream: itag="22" mime_type="video/mp4" res="720p" fps="30fps" vcodec="avc1.64001F" acodec="mp4a.40.2">,
189 |
<Stream: itag="18" mime_type="video/mp4" res="360p" fps="30fps" vcodec="avc1.42001E" acodec="mp4a.40.2">]
190 |
198 |
If you need to optimize for a specific feature, such as the "highest resolution" or "lowest average bitrate":
199 |
200 |
201 |
>>> yt.streams.filter(progressive=True).order_by('resolution').desc()
202 |
203 |
Note: Using ``order_by`` on a given attribute will filter out all streams missing that attribute.
204 |
205 |
### Callbacks
206 |
207 |
If your application requires post-processing logic, pytube allows you to specify an "on download complete" callback function:
208 |
209 |
210 |
>>> def convert_to_aac(stream: Stream, file_path: str):
211 |
return # do work
212 |
213 |
>>> yt.register_on_complete_callback(convert_to_aac)
216 |
Similarly, if your application requires on-download progress logic, pytube exposes a callback for this as well:
217 |
218 |
219 |
>>> def show_progress_bar(stream: Stream, chunk: bytes, bytes_remaining: int):
220 |
return # do work
221 |
222 |
>>> yt.register_on_progress_callback(show_progress_bar)
224 |
225 |
## Command-line interface
226 |
227 |
pytube3 ships with a simple CLI interface for downloading videos, playlists, and captions.
228 |
229 |
Let's start with downloading:
230 |
231 |
232 |
$ pytube3 --itag=18
233 |
234 |
To view available streams:
235 |
236 |
237 |
$ pytube3 --list
238 |
239 |
240 |
The complete set of flags are:
241 |
242 |
243 |
usage: pytube3 [-h] [--version] [--itag ITAG] [-r RESOLUTION] [-l] [-v]
244 |
[--build-playback-report] [-c [CAPTION_CODE]] [-t TARGET]
245 |
[-a [AUDIO]] [-f [FFMPEG]]
246 |
247 |
248 |
Command line application to download youtube videos.
249 |
250 |
positional arguments:
251 |
url The YouTube /watch or /playlist url
252 |
253 |
optional arguments:
254 |
-h, --help show this help message and exit
255 |
--version show program's version number and exit
256 |
--itag ITAG The itag for the desired stream
257 |
258 |
The resolution for the desired stream
259 |
-l, --list The list option causes pytube cli to return a list of
260 |
streams available to download
261 |
-v, --verbose Verbosity level, use up to 4 to increase logging -vvvv
262 |
263 |
Save the html and js to disk
264 |
-c [CAPTION_CODE], --caption-code [CAPTION_CODE]
265 |
Download srt captions for given language code. Prints
266 |
available language codes if no argument given
267 |
-t TARGET, --target TARGET
268 |
The output directory for the downloaded stream.
269 |
Default is current working directory
270 |
-a [AUDIO], --audio [AUDIO]
271 |
Download the audio for a given URL at the highest
272 |
bitrate availableDefaults to mp4 format if none is
273 |
274 |
-f [FFMPEG], --ffmpeg [FFMPEG]
275 |
Downloads the audio and video stream for resolution
276 |
providedIf no resolution is provided, downloads the
277 |
best resolutionRuns the command line program ffmpeg to
278 |
combine the audio and video
279 |
280 |
281 |
282 |
## Development
283 |
284 |
<a href="" target="_blank"><img alt="DeepSource" title="DeepSource" src=""></a>
285 |
<a href=""><img src=""/></a>
286 |
<a href=""><img src="" /></a>
287 |
288 |
Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change.
289 |
290 |
To run code checking before a PR use ``make test``
291 |
292 |
#### Virtual environment
293 |
294 |
Virtual environment is setup with [pipenv]( and can be automatically activated with [direnv](
295 |
296 |
#### Code Formatting
297 |
298 |
This project is linted with [pyflakes](, formatted with [black](, and typed with [mypy](
299 |
300 |
301 |
#### Code of Conduct
302 |
303 |
Treat other people with helpfulness, gratitude, and consideration! See the [Python Community Code of Conduct](
304 |
305 |
## GUIs and other libraries
306 |
* [YouTubeDownload]( - Featured GUI frontend for pytube3
307 |
* [Pytube-GUI]( - Simple GUI frontend for pytube3
308 |
* [StackOverflow questions](
309 |
* [PySlackers]( - Python Slack group
@@ -65,12 +65,6 @@ Mixins
65 |
.. automodule:: pytube.mixins
66 |
67 |
68 |
69 |
70 |
71 |
.. automodule:: pytube.compat
72 |
73 |
74 |
75 |
76 |
65 |
.. automodule:: pytube.mixins
66 |
67 |
68 |
69 |
70 |
@@ -34,9 +34,9 @@ source_suffix = '.rst'
34 |
master_doc = 'index'
35 |
36 |
# General information about the project.
37 |
project = '
38 |
copyright = '2019, Nick Ficano'
39 |
author = 'Nick Ficano'
40 |
41 |
# The version info for the project you're documenting, acts as replacement for
42 |
# |version| and |release|, also used in various other places throughout the
@@ -108,7 +108,7 @@ html_sidebars = {
108 |
# -- Options for HTMLHelp output ------------------------------------------
109 |
110 |
# Output file base name for HTML help builder.
111 |
htmlhelp_basename = '
112 |
113 |
114 |
# -- Options for LaTeX output ---------------------------------------------
@@ -120,7 +120,7 @@ latex_elements = {}
120 |
# author, documentclass [howto, manual, or own class]).
121 |
latex_documents = [
122 |
123 |
master_doc, '
124 |
'Nick Ficano', 'manual',
125 |
126 |
@@ -132,7 +132,7 @@ latex_documents = [
132 |
# (source start file, name, description, authors, manual section).
133 |
man_pages = [
134 |
135 |
master_doc, '
136 |
[author], 1,
137 |
138 |
@@ -145,8 +145,8 @@ man_pages = [
145 |
# dir menu entry, description, category)
146 |
texinfo_documents = [
147 |
148 |
master_doc, '
149 |
author, '
150 |
151 |
152 |
34 |
master_doc = 'index'
35 |
36 |
# General information about the project.
37 |
project = 'pytube3'
38 |
copyright = '2019, Nick Ficano'
39 |
author = 'Nick Ficano, Harold Martin'
40 |
41 |
# The version info for the project you're documenting, acts as replacement for
42 |
# |version| and |release|, also used in various other places throughout the
108 |
# -- Options for HTMLHelp output ------------------------------------------
109 |
110 |
# Output file base name for HTML help builder.
111 |
htmlhelp_basename = 'pytube3doc'
112 |
113 |
114 |
# -- Options for LaTeX output ---------------------------------------------
120 |
# author, documentclass [howto, manual, or own class]).
121 |
latex_documents = [
122 |
123 |
master_doc, 'pytube3.tex', 'pytube3 Documentation',
124 |
'Nick Ficano', 'manual',
125 |
126 |
132 |
# (source start file, name, description, authors, manual section).
133 |
man_pages = [
134 |
135 |
master_doc, 'pytube3', 'pytube3 Documentation',
136 |
[author], 1,
137 |
138 |
145 |
# dir menu entry, description, category)
146 |
texinfo_documents = [
147 |
148 |
master_doc, 'pytube3', 'pytube3 Documentation',
149 |
author, 'pytube3', 'One line description of project.',
150 |
151 |
152 |
@@ -1,31 +1,24 @@
1 |
2 |
sphinx-quickstart on Mon Oct 9 02:11:41 2017.
3 |
You can adapt this file completely to your liking, but it should at least
4 |
contain the root `toctree` directive.
5 |
6 |
7 |
8 |
Release v\ |version|. (:ref:`Installation <install>`)
9 |
10 |
.. image::
11 |
:alt: Pypi
12 |
13 |
14 |
.. image::
15 |
:alt: Build status
16 |
17 |
18 |
.. image::
19 |
20 |
:alt: Documentation Status
21 |
22 |
.. image::
23 |
:alt: Coverage
24 |
25 |
26 |
.. image::
27 |
:alt: Python Versions
28 |
29 |
30 |
**pytube** is a lightweight, Pythonic, dependency-free, library (and command-line utility) for downloading YouTube Videos.
31 |
@@ -33,6 +26,7 @@ Release v\ |version|. (:ref:`Installation <install>`)
33 |
34 |
**Behold, a perfect balance of simplicity versus flexibility**::
35 |
36 |
>>> YouTube('').streams.first().download()
37 |
>>> yt = YouTube('')
38 |
>>> yt.streams
1 |
.. pytube3 documentation master file, created by sphinx-quickstart on Mon Oct 9 02:11:41 2017.
2 |
3 |
4 |
5 |
Release v\ |version|. (:ref:`Installation <install>`)
6 |
7 |
.. image::
8 |
:alt: Pypi
9 |
10 |
11 |
.. image::
12 |
:alt: Build status
13 |
14 |
15 |
.. image::
16 |
:alt: Coverage
17 |
18 |
19 |
.. image::
20 |
:alt: Python Versions
21 |
22 |
23 |
**pytube** is a lightweight, Pythonic, dependency-free, library (and command-line utility) for downloading YouTube Videos.
24 |
26 |
27 |
**Behold, a perfect balance of simplicity versus flexibility**::
28 |
29 |
>>> from pytube import YouTube
30 |
>>> YouTube('').streams.first().download()
31 |
>>> yt = YouTube('')
32 |
>>> yt.streams
@@ -0,0 +1 @@
1 |
@@ -7,20 +7,20 @@ This part of the documentation covers the installation of pytube.
7 |
8 |
To install pytube, run the following command in your terminal::
9 |
10 |
$ pip install
11 |
12 |
Get the Source Code
13 |
14 |
15 |
pytube is actively developed on GitHub, where the source is `available <
16 |
17 |
You can either clone the public repository::
18 |
19 |
$ git clone git://
20 |
21 |
Or, download the `tarball <
22 |
23 |
$ curl -OL
24 |
# optionally, zipball is also available (for Windows users).
25 |
26 |
Once you have a copy of the source, you can embed it in your Python package, or install it into your site-packages by running::
7 |
8 |
To install pytube, run the following command in your terminal::
9 |
10 |
$ pip install pytube3
11 |
12 |
Get the Source Code
13 |
14 |
15 |
pytube is actively developed on GitHub, where the source is `available <>`_.
16 |
17 |
You can either clone the public repository::
18 |
19 |
$ git clone git://
20 |
21 |
Or, download the `tarball <>`_::
22 |
23 |
$ curl -OL
24 |
# optionally, zipball is also available (for Windows users).
25 |
26 |
Once you have a copy of the source, you can embed it in your Python package, or install it into your site-packages by running::
@@ -1,22 +1,18 @@
1 |
# -*- coding: utf-8 -*-
2 |
# flake8: noqa
3 |
# noreorder
4 |
5 |
Pytube: a very serious Python library for downloading YouTube Videos.
6 |
7 |
__title__ =
8 |
9 |
10 |
11 |
__copyright__ = 'Copyright 2019 Nick Ficano'
12 |
13 |
from pytube.
14 |
from pytube.query import CaptionQuery
15 |
from pytube.query import StreamQuery
16 |
from pytube.streams import Stream
17 |
from pytube.captions import Caption
18 |
from pytube.
19 |
from pytube.__main__ import YouTube
20 |
21 |
logger = create_logger()
22 |
-'%s v%s', __title__, __version__)
1 |
# -*- coding: utf-8 -*-
2 |
# flake8: noqa: F401
3 |
# noreorder
4 |
5 |
Pytube: a very serious Python library for downloading YouTube Videos.
6 |
7 |
__title__ = "pytube3"
8 |
__author__ = "Nick Ficano, Harold Martin"
9 |
__license__ = "MIT License"
10 |
__copyright__ = "Copyright 2019 Nick Ficano"
11 |
12 |
from pytube.version import __version__
13 |
from pytube.streams import Stream
14 |
from pytube.captions import Caption
15 |
from pytube.query import CaptionQuery
16 |
from pytube.query import StreamQuery
17 |
from pytube.__main__ import YouTube
18 |
from pytube.contrib.playlist import Playlist
@@ -7,38 +7,43 @@ exclusively on the developer interface. Pytube offloads the heavy lifting to
7 |
smaller peripheral modules and functions.
8 |
9 |
10 |
from __future__ import absolute_import
11 |
12 |
import json
13 |
import logging
14 |
15 |
from pytube import Caption
16 |
from pytube import CaptionQuery
17 |
from pytube import extract
18 |
from pytube import mixins
19 |
from pytube import request
20 |
from pytube import Stream
21 |
from pytube import StreamQuery
22 |
from pytube.
23 |
from pytube.
24 |
from pytube.exceptions import VideoUnavailable
25 |
from pytube.
26 |
27 |
logger = logging.getLogger(__name__)
28 |
29 |
30 |
class YouTube
31 |
"""Core developer interface for pytube."""
32 |
33 |
def __init__(
34 |
35 |
36 |
37 |
"""Construct a :class:`YouTube <YouTube>`.
38 |
39 |
:param str url:
40 |
A valid YouTube watch URL.
41 |
:param bool
42 |
Defers executing any network requests.
43 |
:param func on_progress_callback:
44 |
(Optional) User defined callback function for stream download
@@ -48,55 +53,45 @@ class YouTube(object):
48 |
complete events.
49 |
50 |
51 |
self.js = None
52 |
self.js_url = None # the url to the js, parsed from watch html
53 |
54 |
# note: vid_info may eventually be removed. It sounds like it once had
55 |
# additional formats, but that doesn't appear to still be the case.
56 |
57 |
58 |
self.vid_info_url = None
59 |
60 |
self.watch_html = None
61 |
self.embed_html = None
62 |
self.player_config_args =
63 |
# streams
64 |
self.age_restricted = None
65 |
66 |
self.fmt_streams = []
67 |
self.caption_tracks = []
68 |
69 |
# video_id part of /watch?v=<video_id>
70 |
self.video_id = extract.video_id(url)
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
# user defined callback functions.
80 |
'on_progress': on_progress_callback,
81 |
'on_complete': on_complete_callback,
82 |
83 |
84 |
if proxies:
85 |
86 |
87 |
if not defer_prefetch_init:
88 |
89 |
90 |
def prefetch_init(self):
91 |
"""Download data, descramble it, and build Stream instances.
92 |
93 |
:rtype: None
94 |
95 |
96 |
97 |
98 |
99 |
def init(self):
100 |
"""Descramble the stream data and build Stream instances.
101 |
102 |
The initialization process takes advantage of Python's
@@ -107,60 +102,55 @@ class YouTube(object):
107 |
:rtype: None
108 |
109 |
110 |
111 |
112 |
self.vid_info =
113 |
if self.age_restricted:
114 |
self.player_config_args = self.vid_info
115 |
116 |
117 |
118 |
119 |
120 |
# Fix for KeyError: 'title' issue #434
121 |
122 |
i_start = (
123 |
124 |
125 |
.index('<title>') + len('<title>')
126 |
127 |
i_end = self.watch_html.lower().index('</title>')
128 |
title = self.watch_html[i_start:i_end].strip()
129 |
index = title.lower().rfind(
130 |
title = title[:index] if index > 0 else title
131 |
132 |
133 |
self.vid_descr = extract.get_vid_descr(self.watch_html)
134 |
135 |
stream_maps = [
136 |
137 |
138 |
139 |
# unscramble the progressive and adaptive stream manifests.
140 |
for fmt in stream_maps:
141 |
if not self.age_restricted and fmt in self.vid_info:
142 |
143 |
144 |
145 |
146 |
147 |
148 |
self.js_url = extract.js_url(
149 |
self.embed_html, self.age_restricted,
150 |
151 |
self.js = request.get(self.js_url)
152 |
153 |
154 |
# build instances of :class:`Stream <Stream>`
155 |
156 |
157 |
# load the player_response object (contains subtitle information)
158 |
159 |
160 |
161 |
-'init finished successfully')
162 |
163 |
def prefetch(self):
164 |
"""Eagerly download all necessary data.
165 |
166 |
Eagerly executes all necessary network requests so all other
@@ -168,26 +158,32 @@ class YouTube(object):
168 |
which blocks for long periods of time.
169 |
170 |
:rtype: None
171 |
172 |
173 |
self.watch_html = request.get(url=self.watch_url)
174 |
175 |
raise VideoUnavailable(
176 |
self.embed_html = request.get(url=self.embed_url)
177 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
if not self.age_restricted:
187 |
self.js_url = extract.js_url(self.watch_html
188 |
self.js = request.get(self.js_url)
189 |
190 |
def initialize_stream_objects(self, fmt):
191 |
"""Convert manifest data to instances of :class:`Stream <Stream>`.
192 |
193 |
Take the unscrambled stream data and uses it to initialize
@@ -210,127 +206,131 @@ class YouTube(object):
210 |
211 |
212 |
213 |
214 |
215 |
216 |
Take the unscrambled player response data, and use it to initialize
217 |
instances of :class:`Caption <Caption>`.
218 |
219 |
:rtype: None
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
.get('player_response', {})
228 |
.get('captions', {})
229 |
.get('playerCaptionsTracklistRenderer', {})
230 |
.get('captionTracks', [])
231 |
232 |
233 |
234 |
235 |
236 |
def captions(self):
237 |
"""Interface to query caption tracks.
238 |
239 |
:rtype: :class:`CaptionQuery <CaptionQuery>`.
240 |
241 |
return CaptionQuery(
242 |
243 |
244 |
def streams(self):
245 |
"""Interface to query both adaptive (DASH) and progressive streams.
246 |
247 |
:rtype: :class:`StreamQuery <StreamQuery>`.
248 |
249 |
return StreamQuery(
250 |
251 |
252 |
def thumbnail_url(self):
253 |
"""Get the thumbnail url image.
254 |
255 |
:rtype: str
256 |
257 |
258 |
259 |
260 |
261 |
def title(self):
262 |
"""Get the video title.
263 |
264 |
:rtype: str
265 |
266 |
267 |
return self.player_config_args
268 |
269 |
270 |
def description(self):
271 |
"""Get the video description.
272 |
273 |
:rtype: str
274 |
275 |
276 |
return self.
277 |
278 |
279 |
def rating(self):
280 |
"""Get the video average rating.
281 |
282 |
283 |
284 |
285 |
return (
286 |
287 |
.get('player_response', {})
288 |
.get('videoDetails', {})
289 |
290 |
291 |
292 |
293 |
def length(self):
294 |
"""Get the video length in seconds.
295 |
296 |
:rtype: str
297 |
298 |
299 |
300 |
301 |
302 |
def views(self):
303 |
"""Get the number of the times the video has been viewed.
304 |
305 |
:rtype: str
306 |
307 |
308 |
return (
309 |
310 |
311 |
312 |
313 |
314 |
315 |
def register_on_progress_callback(self, func):
316 |
"""Register a download progress callback function post initialization.
317 |
318 |
:param callable func:
319 |
A callback function that takes ``stream``, ``chunk``,
320 |
321 |
322 |
:rtype: None
323 |
324 |
325 |
326 |
327 |
def register_on_complete_callback(self, func):
328 |
"""Register a download complete callback function post initialization.
329 |
330 |
:param callable func:
331 |
A callback function that takes ``stream`` and ``
332 |
333 |
:rtype: None
334 |
335 |
336 |
7 |
smaller peripheral modules and functions.
8 |
9 |
10 |
11 |
import json
12 |
import logging
13 |
from typing import Optional, Dict, List
14 |
from urllib.parse import parse_qsl
15 |
from html import unescape
16 |
17 |
from pytube import Caption
18 |
from pytube import CaptionQuery
19 |
from pytube import extract
20 |
from pytube import request
21 |
from pytube import Stream
22 |
from pytube import StreamQuery
23 |
from pytube.extract import apply_descrambler, apply_signature, get_ytplayer_config
24 |
from pytube.helpers import install_proxy
25 |
from pytube.exceptions import VideoUnavailable
26 |
from pytube.monostate import OnProgress, OnComplete, Monostate
27 |
28 |
logger = logging.getLogger(__name__)
29 |
30 |
31 |
class YouTube:
32 |
"""Core developer interface for pytube."""
33 |
34 |
def __init__(
35 |
36 |
url: str,
37 |
defer_prefetch_init: bool = False,
38 |
on_progress_callback: Optional[OnProgress] = None,
39 |
on_complete_callback: Optional[OnComplete] = None,
40 |
proxies: Dict[str, str] = None,
41 |
42 |
"""Construct a :class:`YouTube <YouTube>`.
43 |
44 |
:param str url:
45 |
A valid YouTube watch URL.
46 |
:param bool defer_prefetch_init:
47 |
Defers executing any network requests.
48 |
:param func on_progress_callback:
49 |
(Optional) User defined callback function for stream download
53 |
complete events.
54 |
55 |
56 |
self.js: Optional[str] = None # js fetched by js_url
57 |
self.js_url: Optional[str] = None # the url to the js, parsed from watch html
58 |
59 |
# note: vid_info may eventually be removed. It sounds like it once had
60 |
# additional formats, but that doesn't appear to still be the case.
61 |
62 |
# the url to vid info, parsed from watch html
63 |
self.vid_info_url: Optional[str] = None
64 |
self.vid_info_raw: Optional[str] = None # content fetched by vid_info_url
65 |
self.vid_info: Optional[Dict] = None # parsed content of vid_info_raw
66 |
67 |
self.watch_html: Optional[str] = None # the html of /watch?v=<video_id>
68 |
self.embed_html: Optional[str] = None
69 |
self.player_config_args: Dict = {} # inline js in the html containing
70 |
self.player_response: Dict = {}
71 |
# streams
72 |
self.age_restricted: Optional[bool] = None
73 |
74 |
self.fmt_streams: List[Stream] = []
75 |
76 |
# video_id part of /watch?v=<video_id>
77 |
self.video_id = extract.video_id(url)
78 |
79 |
self.watch_url = f"{self.video_id}"
80 |
self.embed_url = f"{self.video_id}"
81 |
82 |
# Shared between all instances of `Stream` (Borg pattern).
83 |
self.stream_monostate = Monostate(
84 |
on_progress=on_progress_callback, on_complete=on_complete_callback
85 |
86 |
87 |
if proxies:
88 |
89 |
90 |
if not defer_prefetch_init:
91 |
92 |
93 |
94 |
def descramble(self) -> None:
95 |
"""Descramble the stream data and build Stream instances.
96 |
97 |
The initialization process takes advantage of Python's
102 |
:rtype: None
103 |
104 |
105 |
+"init started")
106 |
107 |
self.vid_info = dict(parse_qsl(self.vid_info_raw))
108 |
if self.age_restricted:
109 |
self.player_config_args = self.vid_info
110 |
111 |
assert self.watch_html is not None
112 |
self.player_config_args = get_ytplayer_config(self.watch_html)["args"]
113 |
114 |
# Fix for KeyError: 'title' issue #434
115 |
if "title" not in self.player_config_args: # type: ignore
116 |
i_start = self.watch_html.lower().index("<title>") + len("<title>")
117 |
i_end = self.watch_html.lower().index("</title>")
118 |
title = self.watch_html[i_start:i_end].strip()
119 |
index = title.lower().rfind(" - youtube")
120 |
title = title[:index] if index > 0 else title
121 |
self.player_config_args["title"] = unescape(title)
122 |
123 |
124 |
stream_maps = ["url_encoded_fmt_stream_map"]
125 |
if "adaptive_fmts" in self.player_config_args:
126 |
127 |
128 |
# unscramble the progressive and adaptive stream manifests.
129 |
for fmt in stream_maps:
130 |
if not self.age_restricted and fmt in self.vid_info:
131 |
apply_descrambler(self.vid_info, fmt)
132 |
apply_descrambler(self.player_config_args, fmt)
133 |
134 |
if not self.js:
135 |
if not self.embed_html:
136 |
self.embed_html = request.get(url=self.embed_url)
137 |
self.js_url = extract.js_url(self.embed_html)
138 |
self.js = request.get(self.js_url)
139 |
140 |
apply_signature(self.player_config_args, fmt, self.js)
141 |
142 |
# build instances of :class:`Stream <Stream>`
143 |
144 |
145 |
# load the player_response object (contains subtitle information)
146 |
self.player_response = json.loads(self.player_config_args["player_response"])
147 |
del self.player_config_args["player_response"]
148 |
self.stream_monostate.title = self.title
149 |
self.stream_monostate.duration = self.length
150 |
151 |
+"init finished successfully")
152 |
153 |
def prefetch(self) -> None:
154 |
"""Eagerly download all necessary data.
155 |
156 |
Eagerly executes all necessary network requests so all other
158 |
which blocks for long periods of time.
159 |
160 |
:rtype: None
161 |
162 |
self.watch_html = request.get(url=self.watch_url)
163 |
if self.watch_html is None:
164 |
raise VideoUnavailable(video_id=self.video_id)
165 |
self.age_restricted = extract.is_age_restricted(self.watch_html)
166 |
167 |
if not self.age_restricted and "This video is private" in self.watch_html:
168 |
raise VideoUnavailable(video_id=self.video_id)
169 |
170 |
if self.age_restricted:
171 |
if not self.embed_html:
172 |
self.embed_html = request.get(url=self.embed_url)
173 |
self.vid_info_url = extract.video_info_url_age_restricted(
174 |
self.video_id, self.watch_url
175 |
176 |
177 |
self.vid_info_url = extract.video_info_url(
178 |
video_id=self.video_id, watch_url=self.watch_url
179 |
180 |
181 |
self.vid_info_raw = request.get(self.vid_info_url)
182 |
if not self.age_restricted:
183 |
self.js_url = extract.js_url(self.watch_html)
184 |
self.js = request.get(self.js_url)
185 |
186 |
def initialize_stream_objects(self, fmt: str) -> None:
187 |
"""Convert manifest data to instances of :class:`Stream <Stream>`.
188 |
189 |
Take the unscrambled stream data and uses it to initialize
206 |
207 |
208 |
209 |
210 |
def caption_tracks(self) -> List[Caption]:
211 |
"""Get a list of :class:`Caption <Caption>`.
212 |
213 |
:rtype: List[Caption]
214 |
215 |
raw_tracks = (
216 |
self.player_response.get("captions", {})
217 |
.get("playerCaptionsTracklistRenderer", {})
218 |
.get("captionTracks", [])
219 |
220 |
return [Caption(track) for track in raw_tracks]
221 |
222 |
223 |
def captions(self) -> CaptionQuery:
224 |
"""Interface to query caption tracks.
225 |
226 |
:rtype: :class:`CaptionQuery <CaptionQuery>`.
227 |
228 |
return CaptionQuery(self.caption_tracks)
229 |
230 |
231 |
def streams(self) -> StreamQuery:
232 |
"""Interface to query both adaptive (DASH) and progressive streams.
233 |
234 |
:rtype: :class:`StreamQuery <StreamQuery>`.
235 |
236 |
return StreamQuery(self.fmt_streams)
237 |
238 |
239 |
def thumbnail_url(self) -> str:
240 |
"""Get the thumbnail url image.
241 |
242 |
:rtype: str
243 |
244 |
245 |
thumbnail_details = (
246 |
self.player_response.get("videoDetails", {})
247 |
.get("thumbnail", {})
248 |
249 |
250 |
if thumbnail_details:
251 |
thumbnail_details = thumbnail_details[-1] # last item has max size
252 |
return thumbnail_details["url"]
253 |
254 |
return f"{self.video_id}/maxresdefault.jpg"
255 |
256 |
257 |
def title(self) -> str:
258 |
"""Get the video title.
259 |
260 |
:rtype: str
261 |
262 |
263 |
return self.player_config_args.get("title") or (
264 |
self.player_response.get("videoDetails", {}).get("title")
265 |
266 |
267 |
268 |
def description(self) -> str:
269 |
"""Get the video description.
270 |
271 |
:rtype: str
272 |
273 |
274 |
return self.player_response.get("videoDetails", {}).get(
275 |
276 |
) or extract._get_vid_descr(self.watch_html)
277 |
278 |
279 |
def rating(self) -> float:
280 |
"""Get the video average rating.
281 |
282 |
:rtype: float
283 |
284 |
285 |
return self.player_response.get("videoDetails", {}).get("averageRating")
286 |
287 |
288 |
def length(self) -> int:
289 |
"""Get the video length in seconds.
290 |
291 |
:rtype: str
292 |
293 |
294 |
return int(
295 |
296 |
or (self.player_response.get("videoDetails", {}).get("lengthSeconds"))
297 |
298 |
299 |
300 |
def views(self) -> int:
301 |
"""Get the number of the times the video has been viewed.
302 |
303 |
:rtype: str
304 |
305 |
306 |
return int(self.player_response.get("videoDetails", {}).get("viewCount"))
307 |
308 |
309 |
def author(self) -> str:
310 |
"""Get the video author.
311 |
:rtype: str
312 |
313 |
return self.player_response.get("videoDetails", {}).get("author", "unknown")
314 |
315 |
def register_on_progress_callback(self, func: OnProgress):
316 |
"""Register a download progress callback function post initialization.
317 |
318 |
:param callable func:
319 |
A callback function that takes ``stream``, ``chunk``,
320 |
and ``bytes_remaining`` as parameters.
321 |
322 |
:rtype: None
323 |
324 |
325 |
self.stream_monostate.on_progress = func
326 |
327 |
def register_on_complete_callback(self, func: OnComplete):
328 |
"""Register a download complete callback function post initialization.
329 |
330 |
:param callable func:
331 |
A callback function that takes ``stream`` and ``file_path``.
332 |
333 |
:rtype: None
334 |
335 |
336 |
self.stream_monostate.on_complete = func
@@ -1,32 +1,33 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""This module contrains a container for caption tracks."""
3 |
import math
4 |
import time
5 |
import xml.etree.ElementTree as ElementTree
6 |
7 |
from pytube import request
8 |
9 |
10 |
11 |
class Caption:
12 |
"""Container for caption tracks."""
13 |
14 |
def __init__(self, caption_track):
15 |
"""Construct a :class:`Caption <Caption>`.
16 |
17 |
:param dict caption_track:
18 |
Caption track data extracted from ``watch_html``.
19 |
20 |
self.url = caption_track.get(
21 |
- = caption_track[
22 |
self.code = caption_track[
23 |
24 |
25 |
def xml_captions(self):
26 |
"""Download the xml caption tracks."""
27 |
return request.get(self.url)
28 |
29 |
def generate_srt_captions(self):
30 |
"""Generate "SubRip Subtitle" captions.
31 |
32 |
Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
@@ -34,22 +35,22 @@ class Caption:
34 |
35 |
return self.xml_caption_to_srt(self.xml_captions)
36 |
37 |
38 |
"""Convert decimal durations into proper srt format.
39 |
40 |
:rtype: str
41 |
42 |
SubRip Subtitle (str) formatted time duration.
43 |
44 |
45 |
46 |
47 |
48 |
time_fmt = time.strftime(
49 |
ms =
50 |
return time_fmt + ms
51 |
52 |
def xml_caption_to_srt(self, xml_captions):
53 |
"""Convert xml caption tracks to "SubRip Subtitle (srt)".
54 |
55 |
:param str xml_captions:
@@ -57,28 +58,79 @@ class Caption:
57 |
58 |
segments = []
59 |
root = ElementTree.fromstring(xml_captions)
60 |
for i, child in enumerate(root
61 |
text = child.text or
62 |
caption = unescape(
63 |
64 |
65 |
.replace(' ', ' '),
66 |
67 |
duration = float(child.attrib['dur'])
68 |
start = float(child.attrib['start'])
69 |
end = start + duration
70 |
sequence_number = i + 1 # convert from 0-indexed to 1.
71 |
line = (
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
def __repr__(self):
83 |
"""Printable object representation."""
84 |
return'<Caption lang="{}" code="{s.code}">'.format(s=self)
1 |
# -*- coding: utf-8 -*-
2 |
import math
3 |
import os
4 |
import time
5 |
import xml.etree.ElementTree as ElementTree
6 |
from typing import Dict, Optional
7 |
from pytube import request
8 |
from html import unescape
9 |
from pytube.helpers import safe_filename, target_directory
10 |
11 |
12 |
class Caption:
13 |
"""Container for caption tracks."""
14 |
15 |
def __init__(self, caption_track: Dict):
16 |
"""Construct a :class:`Caption <Caption>`.
17 |
18 |
:param dict caption_track:
19 |
Caption track data extracted from ``watch_html``.
20 |
21 |
self.url = caption_track.get("baseUrl")
22 |
+ = caption_track["name"]["simpleText"]
23 |
self.code = caption_track["languageCode"]
24 |
25 |
26 |
def xml_captions(self) -> str:
27 |
"""Download the xml caption tracks."""
28 |
return request.get(self.url)
29 |
30 |
def generate_srt_captions(self) -> str:
31 |
"""Generate "SubRip Subtitle" captions.
32 |
33 |
Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
35 |
36 |
return self.xml_caption_to_srt(self.xml_captions)
37 |
38 |
39 |
def float_to_srt_time_format(d: float) -> str:
40 |
"""Convert decimal durations into proper srt format.
41 |
42 |
:rtype: str
43 |
44 |
SubRip Subtitle (str) formatted time duration.
45 |
46 |
float_to_srt_time_format(3.89) -> '00:00:03,890'
47 |
48 |
fraction, whole = math.modf(d)
49 |
time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
50 |
ms = f"{fraction:.3f}".replace("0.", "")
51 |
return time_fmt + ms
52 |
53 |
def xml_caption_to_srt(self, xml_captions: str) -> str:
54 |
"""Convert xml caption tracks to "SubRip Subtitle (srt)".
55 |
56 |
:param str xml_captions:
58 |
59 |
segments = []
60 |
root = ElementTree.fromstring(xml_captions)
61 |
for i, child in enumerate(list(root)):
62 |
text = child.text or ""
63 |
caption = unescape(text.replace("\n", " ").replace(" ", " "),)
64 |
duration = float(child.attrib["dur"])
65 |
start = float(child.attrib["start"])
66 |
end = start + duration
67 |
sequence_number = i + 1 # convert from 0-indexed to 1.
68 |
line = "{seq}\n{start} --> {end}\n{text}\n".format(
69 |
70 |
71 |
72 |
73 |
74 |
75 |
return "\n".join(segments).strip()
76 |
77 |
def download(
78 |
79 |
title: str,
80 |
srt: bool = True,
81 |
output_path: Optional[str] = None,
82 |
filename_prefix: Optional[str] = None,
83 |
) -> str:
84 |
"""Write the media stream to disk.
85 |
86 |
:param title:
87 |
Output filename (stem only) for writing media file.
88 |
If one is not specified, the default filename is used.
89 |
:type title: str
90 |
:param srt:
91 |
Set to True to download srt, false to download xml. Defaults to True.
92 |
:type srt bool
93 |
:param output_path:
94 |
(optional) Output path for writing media file. If one is not
95 |
specified, defaults to the current working directory.
96 |
:type output_path: str or None
97 |
:param filename_prefix:
98 |
(optional) A string that will be prepended to the filename.
99 |
For example a number in a playlist or the name of a series.
100 |
If one is not specified, nothing will be prepended
101 |
This is separate from filename so you can use the default
102 |
filename but still add a prefix.
103 |
:type filename_prefix: str or None
104 |
105 |
:rtype: str
106 |
107 |
if title.endswith(".srt") or title.endswith(".xml"):
108 |
filename = ".".join(title.split(".")[:-1])
109 |
110 |
filename = title
111 |
112 |
if filename_prefix:
113 |
filename = f"{safe_filename(filename_prefix)}{filename}"
114 |
115 |
filename = safe_filename(filename)
116 |
117 |
filename += f" ({self.code})"
118 |
119 |
if srt:
120 |
filename += ".srt"
121 |
122 |
filename += ".xml"
123 |
124 |
file_path = os.path.join(target_directory(output_path), filename)
125 |
126 |
with open(file_path, "w", encoding="utf-8") as file_handle:
127 |
if srt:
128 |
129 |
130 |
131 |
132 |
return file_path
133 |
134 |
def __repr__(self):
135 |
"""Printable object representation."""
136 |
return '<Caption lang="{}" code="{s.code}">'.format(s=self)
@@ -1,6 +1,7 @@
1 |
# -*- coding: utf-8 -*-
2 |
3 |
This module
4 |
5 |
YouTube's strategy to restrict downloading videos is to send a ciphered version
6 |
of the signature to the client, along with the decryption algorithm obfuscated
@@ -13,48 +14,116 @@ functions" (2) maps them to Python equivalents and (3) taking the ciphered
13 |
signature and decoding it.
14 |
15 |
16 |
from __future__ import absolute_import
17 |
18 |
import logging
19 |
import pprint
20 |
import re
21 |
from itertools import chain
22 |
23 |
from pytube.exceptions import RegexMatchError
24 |
from pytube.helpers import regex_search
25 |
26 |
27 |
logger = logging.getLogger(__name__)
28 |
29 |
30 |
31 |
"""Extract the name of the function responsible for computing the signature.
32 |
33 |
:param str js:
34 |
The contents of the base.js asset file.
35 |
36 |
37 |
# c&&d.set("signature", EE(c));
38 |
39 |
40 |
41 |
42 |
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
return regex_search(pattern, js, group=1)
55 |
56 |
57 |
def get_transform_plan(js):
58 |
"""Extract the "transform plan".
59 |
60 |
The "transform plan" is the functions that the ciphered signature is
@@ -65,7 +134,6 @@ def get_transform_plan(js):
65 |
66 |
67 |
68 |
>>> get_transform_plan(js)
69 |
70 |
71 |
@@ -76,12 +144,12 @@ def get_transform_plan(js):
76 |
77 |
78 |
name = re.escape(get_initial_function_name(js))
79 |
pattern = r
80 |
81 |
return regex_search(pattern, js, group=1).split(
82 |
83 |
84 |
def get_transform_object(js, var):
85 |
"""Extract the "transform object".
86 |
87 |
The "transform object" contains the function definitions referenced in the
@@ -103,16 +171,17 @@ def get_transform_object(js, var):
103 |
'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']
104 |
105 |
106 |
pattern = r
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
"""Build a transform function lookup.
117 |
118 |
Build a lookup table of obfuscated JavaScript function names to the
@@ -129,13 +198,13 @@ def get_transform_map(js, var):
129 |
mapper = {}
130 |
for obj in transform_object:
131 |
# AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()}
132 |
name, function = obj.split(
133 |
fn = map_functions(function)
134 |
mapper[name] = fn
135 |
return mapper
136 |
137 |
138 |
def reverse(arr,
139 |
"""Reverse elements in a list.
140 |
141 |
This function is equivalent to:
@@ -155,7 +224,7 @@ def reverse(arr, b):
155 |
return arr[::-1]
156 |
157 |
158 |
def splice(arr, b):
159 |
"""Add/remove items to/from a list.
160 |
161 |
This function is equivalent to:
@@ -169,10 +238,10 @@ def splice(arr, b):
169 |
>>> splice([1, 2, 3, 4], 2)
170 |
[1, 2]
171 |
172 |
return arr[:b] + arr[b * 2:]
173 |
174 |
175 |
def swap(arr, b):
176 |
"""Swap positions at b modulus the list length.
177 |
178 |
This function is equivalent to:
@@ -187,10 +256,10 @@ def swap(arr, b):
187 |
[3, 2, 1, 4]
188 |
189 |
r = b % len(arr)
190 |
return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1:]))
191 |
192 |
193 |
def map_functions(js_func):
194 |
"""For a given JavaScript transform function, return the Python equivalent.
195 |
196 |
:param str js_func:
@@ -199,80 +268,19 @@ def map_functions(js_func):
199 |
200 |
mapper = (
201 |
# function(a){a.reverse()}
202 |
203 |
# function(a,b){a.splice(0,b)}
204 |
205 |
# function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
206 |
207 |
# function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
208 |
209 |
210 |
211 |
212 |
213 |
214 |
for pattern, fn in mapper:
215 |
if, js_func):
216 |
return fn
217 |
raise RegexMatchError(
218 |
'could not find python equivalent function for: ',
219 |
220 |
221 |
222 |
223 |
def parse_function(js_func):
224 |
"""Parse the Javascript transform function.
225 |
226 |
Break a JavaScript transform function down into a two element ``tuple``
227 |
containing the function name and some integer-based argument.
228 |
229 |
:param str js_func:
230 |
The JavaScript version of the transform function.
231 |
:rtype: tuple
232 |
233 |
two element tuple containing the function name and an argument.
234 |
235 |
236 |
237 |
>>> parse_function('DE.AJ(a,15)')
238 |
('AJ', 15)
239 |
240 |
241 |
logger.debug('parsing transform function')
242 |
return regex_search(r'\w+\.(\w+)\(\w,(\d+)\)', js_func, groups=True)
243 |
244 |
245 |
def get_signature(js, ciphered_signature):
246 |
"""Decipher the signature.
247 |
248 |
Taking the ciphered signature, applies the transform functions.
249 |
250 |
:param str js:
251 |
The contents of the base.js asset file.
252 |
:param str ciphered_signature:
253 |
The ciphered signature sent in the ``player_config``.
254 |
:rtype: str
255 |
256 |
Decrypted signature required to download the media content.
257 |
258 |
259 |
tplan = get_transform_plan(js)
260 |
# DE.AJ(a,15) => DE, AJ(a,15)
261 |
var, _ = tplan[0].split('.')
262 |
tmap = get_transform_map(js, var)
263 |
signature = [s for s in ciphered_signature]
264 |
265 |
for js_func in tplan:
266 |
name, argument = parse_function(js_func)
267 |
signature = tmap[name](signature, int(argument))
268 |
269 |
'applied transform function\n%s', pprint.pformat(
270 |
271 |
'output': ''.join(signature),
272 |
'js_function': name,
273 |
'argument': int(argument),
274 |
'function': tmap[name],
275 |
}, indent=2,
276 |
277 |
278 |
return ''.join(signature)
1 |
# -*- coding: utf-8 -*-
2 |
3 |
4 |
This module contains all logic necessary to decipher the signature.
5 |
6 |
YouTube's strategy to restrict downloading videos is to send a ciphered version
7 |
of the signature to the client, along with the decryption algorithm obfuscated
14 |
signature and decoding it.
15 |
16 |
17 |
import logging
18 |
import re
19 |
from itertools import chain
20 |
from typing import List, Tuple, Dict, Callable, Any, Optional
21 |
22 |
from pytube.exceptions import RegexMatchError
23 |
from pytube.helpers import regex_search, cache
24 |
25 |
logger = logging.getLogger(__name__)
26 |
27 |
28 |
class Cipher:
29 |
def __init__(self, js: str):
30 |
self.transform_plan: List[str] = get_transform_plan(js)
31 |
var, _ = self.transform_plan[0].split(".")
32 |
self.transform_map = get_transform_map(js, var)
33 |
self.js_func_regex = re.compile(r"\w+\.(\w+)\(\w,(\d+)\)")
34 |
35 |
def get_signature(self, ciphered_signature: str) -> str:
36 |
"""Decipher the signature.
37 |
38 |
Taking the ciphered signature, applies the transform functions.
39 |
40 |
:param str ciphered_signature:
41 |
The ciphered signature sent in the ``player_config``.
42 |
:rtype: str
43 |
44 |
Decrypted signature required to download the media content.
45 |
46 |
signature = list(ciphered_signature)
47 |
48 |
for js_func in self.transform_plan:
49 |
name, argument = self.parse_function(js_func) # type: ignore
50 |
signature = self.transform_map[name](signature, argument)
51 |
52 |
"applied transform function\n"
53 |
"output: %s\n"
54 |
"js_function: %s\n"
55 |
"argument: %d\n"
56 |
"function: %s",
57 |
58 |
59 |
60 |
61 |
62 |
63 |
return "".join(signature)
64 |
65 |
66 |
def parse_function(self, js_func: str) -> Tuple[str, int]:
67 |
"""Parse the Javascript transform function.
68 |
69 |
Break a JavaScript transform function down into a two element ``tuple``
70 |
containing the function name and some integer-based argument.
71 |
72 |
:param str js_func:
73 |
The JavaScript version of the transform function.
74 |
:rtype: tuple
75 |
76 |
two element tuple containing the function name and an argument.
77 |
78 |
79 |
80 |
81 |
('AJ', 15)
82 |
83 |
84 |
logger.debug("parsing transform function")
85 |
parse_match =
86 |
if not parse_match:
87 |
raise RegexMatchError(caller="parse_function", pattern="js_func_regex")
88 |
fn_name, fn_arg = parse_match.groups()
89 |
return fn_name, int(fn_arg)
90 |
91 |
92 |
def get_initial_function_name(js: str) -> str:
93 |
"""Extract the name of the function responsible for computing the signature.
94 |
:param str js:
95 |
The contents of the base.js asset file.
96 |
:rtype: str
97 |
98 |
Function name from regex match
99 |
100 |
101 |
function_patterns = [
102 |
r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
103 |
r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
104 |
r'\b(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
105 |
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', # noqa: E501
106 |
107 |
108 |
r"yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
109 |
r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
110 |
r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
111 |
r"\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
112 |
r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
113 |
r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(", # noqa: E501
114 |
115 |
logger.debug("finding initial function name")
116 |
for pattern in function_patterns:
117 |
regex = re.compile(pattern)
118 |
function_match =
119 |
if function_match:
120 |
logger.debug("finished regex search, matched: %s", pattern)
121 |
122 |
123 |
raise RegexMatchError(caller="get_initial_function_name", pattern="multiple")
124 |
125 |
126 |
def get_transform_plan(js: str) -> List[str]:
127 |
"""Extract the "transform plan".
128 |
129 |
The "transform plan" is the functions that the ciphered signature is
134 |
135 |
136 |
137 |
138 |
139 |
144 |
145 |
146 |
name = re.escape(get_initial_function_name(js))
147 |
pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
148 |
logger.debug("getting transform plan")
149 |
return regex_search(pattern, js, group=1).split(";")
150 |
151 |
152 |
def get_transform_object(js: str, var: str) -> List[str]:
153 |
"""Extract the "transform object".
154 |
155 |
The "transform object" contains the function definitions referenced in the
171 |
'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']
172 |
173 |
174 |
pattern = r"var %s={(.*?)};" % re.escape(var)
175 |
logger.debug("getting transform object")
176 |
regex = re.compile(pattern, flags=re.DOTALL)
177 |
transform_match =
178 |
if not transform_match:
179 |
raise RegexMatchError(caller="get_transform_object", pattern=pattern)
180 |
181 |
return"\n", " ").split(", ")
182 |
183 |
184 |
def get_transform_map(js: str, var: str) -> Dict:
185 |
"""Build a transform function lookup.
186 |
187 |
Build a lookup table of obfuscated JavaScript function names to the
198 |
mapper = {}
199 |
for obj in transform_object:
200 |
# AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()}
201 |
name, function = obj.split(":", 1)
202 |
fn = map_functions(function)
203 |
mapper[name] = fn
204 |
return mapper
205 |
206 |
207 |
def reverse(arr: List, _: Optional[Any]):
208 |
"""Reverse elements in a list.
209 |
210 |
This function is equivalent to:
224 |
return arr[::-1]
225 |
226 |
227 |
def splice(arr: List, b: int):
228 |
"""Add/remove items to/from a list.
229 |
230 |
This function is equivalent to:
238 |
>>> splice([1, 2, 3, 4], 2)
239 |
[1, 2]
240 |
241 |
return arr[:b] + arr[b * 2 :]
242 |
243 |
244 |
def swap(arr: List, b: int):
245 |
"""Swap positions at b modulus the list length.
246 |
247 |
This function is equivalent to:
256 |
[3, 2, 1, 4]
257 |
258 |
r = b % len(arr)
259 |
return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
260 |
261 |
262 |
def map_functions(js_func: str) -> Callable:
263 |
"""For a given JavaScript transform function, return the Python equivalent.
264 |
265 |
:param str js_func:
268 |
269 |
mapper = (
270 |
# function(a){a.reverse()}
271 |
(r"{\w\.reverse\(\)}", reverse),
272 |
# function(a,b){a.splice(0,b)}
273 |
(r"{\w\.splice\(0,\w\)}", splice),
274 |
# function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
275 |
(r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap),
276 |
# function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
277 |
278 |
279 |
280 |
281 |
282 |
283 |
for pattern, fn in mapper:
284 |
if, js_func):
285 |
return fn
286 |
raise RegexMatchError(caller="map_functions", pattern="multiple")
@@ -1,7 +1,6 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""A simple command line application to download youtube videos."""
3 |
from __future__ import absolute_import
4 |
from __future__ import print_function
5 |
6 |
import argparse
7 |
import datetime as dt
@@ -9,97 +8,176 @@ import gzip
9 |
import json
10 |
import logging
11 |
import os
12 |
import sys
13 |
14 |
from pytube import __version__
15 |
from pytube import YouTube
16 |
17 |
18 |
logger = logging.getLogger(__name__)
19 |
20 |
21 |
def main():
22 |
"""Command line application to download youtube videos."""
23 |
parser = argparse.ArgumentParser(description=main.__doc__)
24 |
25 |
26 |
27 |
version='%(prog)s ' + __version__,
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
logging.getLogger().setLevel(max(3 - args.verbosity, 0) * 10)
52 |
53 |
if not args.url:
54 |
55 |
56 |
57 |
if args.list:
58 |
59 |
60 |
elif args.build_playback_report:
61 |
62 |
63 |
elif args.itag:
64 |
download(args.url, args.itag)
65 |
66 |
67 |
def build_playback_report(
68 |
"""Serialize the request data to json for offline debugging.
69 |
70 |
71 |
72 |
73 |
yt = YouTube(url)
74 |
ts = int(dt.datetime.utcnow().timestamp())
75 |
fp = os.path.join(
76 |
77 |
'yt-video-{yt.video_id}-{ts}.json.gz'.format(yt=yt, ts=ts),
78 |
79 |
80 |
js =
81 |
watch_html =
82 |
vid_info =
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
return int(rows), int(columns)
100 |
101 |
102 |
def display_progress_bar(bytes_received, filesize, ch='█', scale=0.55):
103 |
"""Display a simple, pretty progress bar.
104 |
105 |
@@ -112,77 +190,285 @@ def display_progress_bar(bytes_received, filesize, ch='█', scale=0.55):
112 |
written to disk.
113 |
:param int filesize:
114 |
File size of the media stream in bytes.
115 |
:param ch
116 |
Character to use for presenting progress segment.
117 |
:param float scale:
118 |
119 |
120 |
121 |
122 |
max_width = int(columns * scale)
123 |
124 |
filled = int(round(max_width * bytes_received / float(filesize)))
125 |
remaining = max_width - filled
126 |
127 |
percent = round(100.0 * bytes_received / float(filesize), 1)
128 |
text =
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
An instance of :class:`Stream <Stream>` being downloaded.
138 |
:param file_handle:
139 |
The file handle where the media is being written to.
140 |
:type file_handle:
141 |
142 |
:param int bytes_remaining:
143 |
How many bytes have been downloaded.
144 |
145 |
146 |
filesize = stream.filesize
147 |
bytes_received = filesize - bytes_remaining
148 |
display_progress_bar(bytes_received, filesize)
149 |
150 |
151 |
152 |
"""Start downloading a YouTube video.
153 |
154 |
155 |
A valid YouTube
156 |
157 |
YouTube format identifier code.
158 |
159 |
160 |
# TODO(nficano): allow download target to be specified
161 |
# TODO(nficano): allow dash itags to be selected
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
except KeyboardInterrupt:
172 |
173 |
174 |
175 |
def display_streams(
176 |
"""Probe YouTube video and lists its available formats.
177 |
178 |
179 |
A valid YouTube watch URL.
180 |
181 |
182 |
183 |
for stream in yt.streams.all():
184 |
185 |
186 |
187 |
188 |
1 |
#!/usr/bin/env python3
2 |
# -*- coding: utf-8 -*-
3 |
"""A simple command line application to download youtube videos."""
4 |
5 |
import argparse
6 |
import datetime as dt
8 |
import json
9 |
import logging
10 |
import os
11 |
import shutil
12 |
import sys
13 |
import subprocess # nosec
14 |
from typing import List, Optional
15 |
16 |
from pytube import __version__, CaptionQuery, Stream, Playlist
17 |
from pytube import YouTube
18 |
from pytube.exceptions import PytubeError
19 |
from pytube.helpers import safe_filename, setup_logger
20 |
21 |
22 |
def main():
23 |
"""Command line application to download youtube videos."""
24 |
# noinspection PyTypeChecker
25 |
parser = argparse.ArgumentParser(description=main.__doc__)
26 |
args = _parse_args(parser)
27 |
if args.verbosity:
28 |
log_level = min(args.verbosity, 4) * 10
29 |
setup_logger(logging.FATAL - log_level)
30 |
31 |
if not args.url or "youtu" not in args.url:
32 |
33 |
34 |
35 |
if "/playlist" in args.url:
36 |
print("Loading playlist...")
37 |
playlist = Playlist(args.url)
38 |
if not
39 |
+ = safe_filename(playlist.title())
40 |
for youtube_video in playlist.videos:
41 |
42 |
_perform_args_on_youtube(youtube_video, args)
43 |
except PytubeError as e:
44 |
print(f"There was an error with video: {youtube_video}")
45 |
46 |
47 |
print("Loading video...")
48 |
youtube = YouTube(args.url)
49 |
_perform_args_on_youtube(youtube, args)
50 |
51 |
52 |
def _perform_args_on_youtube(youtube: YouTube, args: argparse.Namespace) -> None:
53 |
if args.list:
54 |
55 |
if args.build_playback_report:
56 |
57 |
if args.itag:
58 |
download_by_itag(youtube=youtube, itag=args.itag,
59 |
if hasattr(args, "caption_code"):
60 |
61 |
youtube=youtube, lang_code=args.caption_code,
62 |
63 |
if args.resolution:
64 |
65 |
youtube=youtube, resolution=args.resolution,
66 |
67 |
68 |
69 |
if args.ffmpeg:
70 |
ffmpeg_process(youtube=youtube, resolution=args.ffmpeg,
71 |
72 |
73 |
def _parse_args(
74 |
parser: argparse.ArgumentParser, args: Optional[List] = None
75 |
) -> argparse.Namespace:
76 |
parser.add_argument("url", help="The YouTube /watch or /playlist url", nargs="?")
77 |
78 |
"--version", action="version", version="%(prog)s " + __version__,
79 |
80 |
81 |
"--itag", type=int, help="The itag for the desired stream",
82 |
83 |
84 |
"-r", "--resolution", type=str, help="The resolution for the desired stream",
85 |
86 |
87 |
88 |
89 |
90 |
91 |
"The list option causes pytube cli to return a list of streams "
92 |
"available to download"
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
help="Verbosity level, use up to 4 to increase logging -vvvv",
102 |
103 |
104 |
105 |
106 |
help="Save the html and js to disk",
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
"Download srt captions for given language code. "
116 |
"Prints available language codes if no argument given"
117 |
118 |
119 |
120 |
121 |
122 |
123 |
"The output directory for the downloaded stream. "
124 |
"Default is current working directory"
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
"Download the audio for a given URL at the highest bitrate available"
134 |
"Defaults to mp4 format if none is specified"
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
"Downloads the audio and video stream for resolution provided"
144 |
"If no resolution is provided, downloads the best resolution"
145 |
"Runs the command line program ffmpeg to combine the audio and video"
146 |
147 |
148 |
149 |
return parser.parse_args(args)
150 |
151 |
152 |
def build_playback_report(youtube: YouTube) -> None:
153 |
"""Serialize the request data to json for offline debugging.
154 |
155 |
:param YouTube youtube:
156 |
A YouTube object.
157 |
158 |
ts = int(dt.datetime.utcnow().timestamp())
159 |
fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
160 |
161 |
js = youtube.js
162 |
watch_html = youtube.watch_html
163 |
vid_info = youtube.vid_info
164 |
165 |
with, "wb") as fh:
166 |
167 |
168 |
169 |
"url": youtube.watch_url,
170 |
"js": js,
171 |
"watch_html": watch_html,
172 |
"video_info": vid_info,
173 |
174 |
175 |
176 |
177 |
178 |
def display_progress_bar(
179 |
bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55
180 |
) -> None:
181 |
"""Display a simple, pretty progress bar.
182 |
183 |
190 |
written to disk.
191 |
:param int filesize:
192 |
File size of the media stream in bytes.
193 |
:param str ch:
194 |
Character to use for presenting progress segment.
195 |
:param float scale:
196 |
Scale multiplier to reduce progress bar size.
197 |
198 |
199 |
columns = shutil.get_terminal_size().columns
200 |
max_width = int(columns * scale)
201 |
202 |
filled = int(round(max_width * bytes_received / float(filesize)))
203 |
remaining = max_width - filled
204 |
progress_bar = ch * filled + " " * remaining
205 |
percent = round(100.0 * bytes_received / float(filesize), 1)
206 |
text = f" ↳ |{progress_bar}| {percent}%\r"
207 |
208 |
209 |
210 |
211 |
# noinspection PyUnusedLocal
212 |
def on_progress(
213 |
stream: Stream, chunk: bytes, bytes_remaining: int
214 |
) -> None: # pylint: disable=W0613
215 |
filesize = stream.filesize
216 |
bytes_received = filesize - bytes_remaining
217 |
display_progress_bar(bytes_received, filesize)
218 |
219 |
220 |
def _download(
221 |
stream: Stream, target: Optional[str] = None, filename: Optional[str] = None
222 |
) -> None:
223 |
filesize_megabytes = stream.filesize // 1048576
224 |
print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
225 |
file_path = stream.get_file_path(filename=filename, output_path=target)
226 |
if stream.exists_at_path(file_path):
227 |
print(f"Already downloaded at:\n{file_path}")
228 |
229 |
230 |
+, filename=filename)
231 |
232 |
233 |
234 |
def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
235 |
236 |
Given a base name, the file format, and the target directory, will generate
237 |
a filename unique for that directory and file format.
238 |
:param str base:
239 |
The given base-name.
240 |
:param str subtype:
241 |
The filetype of the video which will be downloaded.
242 |
:param str media_type:
243 |
The media_type of the file, ie. "audio" or "video"
244 |
:param Path target:
245 |
Target directory for download.
246 |
247 |
counter = 0
248 |
while True:
249 |
file_name = f"{base}_{media_type}_{counter}"
250 |
file_path = os.path.join(target, f"{file_name}.{subtype}")
251 |
if not os.path.exists(file_path):
252 |
return file_name
253 |
counter += 1
254 |
255 |
256 |
def ffmpeg_process(
257 |
youtube: YouTube, resolution: str, target: Optional[str] = None
258 |
) -> None:
259 |
260 |
Decides the correct video stream to download, then calls _ffmpeg_downloader.
261 |
262 |
:param YouTube youtube:
263 |
A valid YouTube object.
264 |
:param str resolution:
265 |
YouTube video resolution.
266 |
:param str target:
267 |
Target directory for download
268 |
269 |
270 |
target = target or os.getcwd()
271 |
272 |
if resolution == "best":
273 |
highest_quality_stream = (
274 |
275 |
276 |
mp4_stream = (
277 |
youtube.streams.filter(progressive=False, subtype="mp4")
278 |
279 |
280 |
281 |
if highest_quality_stream.resolution == mp4_stream.resolution:
282 |
video_stream = mp4_stream
283 |
284 |
video_stream = highest_quality_stream
285 |
286 |
video_stream = youtube.streams.filter(
287 |
progressive=False, resolution=resolution, subtype="mp4"
288 |
289 |
if not video_stream:
290 |
video_stream = youtube.streams.filter(
291 |
progressive=False, resolution=resolution
292 |
293 |
if video_stream is None:
294 |
print(f"Could not find a stream with resolution: {resolution}")
295 |
print("Try one of these:")
296 |
297 |
298 |
299 |
audio_stream = youtube.streams.get_audio_only(video_stream.subtype)
300 |
if not audio_stream:
301 |
audio_stream = youtube.streams.filter(only_audio=True).order_by("abr").last()
302 |
if not audio_stream:
303 |
print("Could not find an audio only stream")
304 |
305 |
306 |
audio_stream=audio_stream, video_stream=video_stream, target=target
307 |
308 |
309 |
310 |
def _ffmpeg_downloader(audio_stream: Stream, video_stream: Stream, target: str) -> None:
311 |
312 |
Given a YouTube Stream object, finds the correct audio stream, downloads them both
313 |
giving them a unique name, them uses ffmpeg to create a new file with the audio
314 |
and video from the previously downloaded files. Then deletes the original adaptive
315 |
streams, leaving the combination.
316 |
317 |
:param Stream audio_stream:
318 |
A valid Stream object representing the audio to download
319 |
:param Stream video_stream:
320 |
A valid Stream object representing the video to download
321 |
:param Path target:
322 |
A valid Path object
323 |
324 |
video_unique_name = _unique_name(
325 |
safe_filename(video_stream.title), video_stream.subtype, "video", target=target
326 |
327 |
audio_unique_name = _unique_name(
328 |
safe_filename(video_stream.title), audio_stream.subtype, "audio", target=target
329 |
330 |
_download(stream=video_stream, target=target, filename=video_unique_name)
331 |
print("Loading audio...")
332 |
_download(stream=audio_stream, target=target, filename=audio_unique_name)
333 |
334 |
video_path = os.path.join(target, f"{video_unique_name}.{video_stream.subtype}")
335 |
audio_path = os.path.join(target, f"{audio_unique_name}.{audio_stream.subtype}")
336 |
final_path = os.path.join(
337 |
target, f"{safe_filename(video_stream.title)}.{video_stream.subtype}"
338 |
339 |
340 |
+ # nosec
341 |
["ffmpeg", "-i", video_path, "-i", audio_path, "-codec", "copy", final_path,]
342 |
343 |
344 |
345 |
346 |
347 |
def download_by_itag(youtube: YouTube, itag: int, target: Optional[str] = None) -> None:
348 |
"""Start downloading a YouTube video.
349 |
350 |
:param YouTube youtube:
351 |
A valid YouTube object.
352 |
:param int itag:
353 |
YouTube format identifier code.
354 |
:param str target:
355 |
Target directory for download
356 |
357 |
stream = youtube.streams.get_by_itag(itag)
358 |
if stream is None:
359 |
print(f"Could not find a stream with itag: {itag}")
360 |
print("Try one of these:")
361 |
362 |
363 |
364 |
365 |
366 |
367 |
_download(stream, target=target)
368 |
except KeyboardInterrupt:
369 |
370 |
371 |
372 |
def download_by_resolution(
373 |
youtube: YouTube, resolution: str, target: Optional[str] = None
374 |
) -> None:
375 |
"""Start downloading a YouTube video.
376 |
377 |
:param YouTube youtube:
378 |
A valid YouTube object.
379 |
:param str resolution:
380 |
YouTube video resolution.
381 |
:param str target:
382 |
Target directory for download
383 |
384 |
# TODO(nficano): allow dash itags to be selected
385 |
stream = youtube.streams.get_by_resolution(resolution)
386 |
if stream is None:
387 |
print(f"Could not find a stream with resolution: {resolution}")
388 |
print("Try one of these:")
389 |
390 |
391 |
392 |
393 |
394 |
395 |
_download(stream, target=target)
396 |
except KeyboardInterrupt:
397 |
398 |
399 |
400 |
def display_streams(youtube: YouTube) -> None:
401 |
"""Probe YouTube video and lists its available formats.
402 |
403 |
:param YouTube youtube:
404 |
A valid YouTube watch URL.
405 |
406 |
407 |
for stream in youtube.streams:
408 |
409 |
410 |
411 |
def _print_available_captions(captions: CaptionQuery) -> None:
412 |
print(f"Available caption codes are: {', '.join(c.code for c in captions)}")
413 |
414 |
415 |
def download_caption(
416 |
youtube: YouTube, lang_code: Optional[str], target: Optional[str] = None
417 |
) -> None:
418 |
"""Download a caption for the YouTube video.
419 |
420 |
:param YouTube youtube:
421 |
A valid YouTube object.
422 |
:param str lang_code:
423 |
Language code desired for caption file.
424 |
Prints available codes if the value is None
425 |
or the desired code is not available.
426 |
:param str target:
427 |
Target directory for download
428 |
429 |
if lang_code is None:
430 |
431 |
432 |
433 |
434 |
caption = youtube.captions[lang_code]
435 |
downloaded_path =, output_path=target)
436 |
print(f"Saved caption file to: {downloaded_path}")
437 |
except KeyError:
438 |
print(f"Unable to find caption with code: {lang_code}")
439 |
440 |
441 |
442 |
def download_audio(
443 |
youtube: YouTube, filetype: str, target: Optional[str] = None
444 |
) -> None:
445 |
446 |
Given a filetype, downloads the highest quality available audio stream for a
447 |
YouTube video.
448 |
449 |
:param YouTube youtube:
450 |
A valid YouTube object.
451 |
:param str filetype:
452 |
Desired file format to download.
453 |
:param str target:
454 |
Target directory for download
455 |
456 |
audio = (
457 |
youtube.streams.filter(only_audio=True, subtype=filetype).order_by("abr").last()
458 |
459 |
460 |
if audio is None:
461 |
print("No audio only stream found. Try one of these:")
462 |
463 |
464 |
465 |
466 |
467 |
468 |
_download(audio, target=target)
469 |
except KeyboardInterrupt:
470 |
471 |
472 |
473 |
if __name__ == "__main__":
474 |
@@ -1,70 +0,0 @@
1 |
#!/usr/bin/env python
2 |
# -*- coding: utf-8 -*-
3 |
# flake8: noqa
4 |
"""Python 2/3 compatibility support."""
5 |
import sys
6 |
7 |
8 |
PY2 = sys.version_info[0] == 2
9 |
PY3 = sys.version_info[0] == 3
10 |
PY33 = sys.version_info[0:2] >= (3, 3)
11 |
12 |
if PY2:
13 |
14 |
15 |
import urllib2
16 |
from urllib import urlencode
17 |
from urllib2 import URLError
18 |
from urllib2 import quote
19 |
from urllib2 import unquote
20 |
from urllib2 import urlopen
21 |
from urlparse import parse_qsl
22 |
from HTMLParser import HTMLParser
23 |
24 |
def install_proxy(proxy_handler):
25 |
26 |
install global proxy.
27 |
:param proxy_handler:
28 |
:samp:`{"http":"", "https":""}`
29 |
30 |
31 |
proxy_support = urllib2.ProxyHandler(proxy_handler)
32 |
opener = urllib2.build_opener(proxy_support)
33 |
34 |
35 |
def unescape(s):
36 |
"""Strip HTML entries from a string."""
37 |
html_parser = HTMLParser()
38 |
return html_parser.unescape(s)
39 |
40 |
def unicode(s):
41 |
"""Encode a string to utf-8."""
42 |
return s.encode('utf-8')
43 |
44 |
elif PY3:
45 |
from urllib.error import URLError
46 |
from urllib.parse import parse_qsl
47 |
from urllib.parse import quote
48 |
from urllib.parse import unquote
49 |
from urllib.parse import urlencode
50 |
from urllib.request import urlopen
51 |
from urllib import request
52 |
53 |
def install_proxy(proxy_handler):
54 |
proxy_support = request.ProxyHandler(proxy_handler)
55 |
opener = request.build_opener(proxy_support)
56 |
57 |
58 |
def unicode(s):
59 |
60 |
return s
61 |
62 |
if PY33:
63 |
from html.parser import HTMLParser
64 |
65 |
def unescape(s):
66 |
"""Strip HTML entries from a string."""
67 |
html_parser = HTMLParser()
68 |
return html_parser.unescape(s)
69 |
70 |
from html import unescape
@@ -1,107 +1,172 @@
1 |
# -*- coding: utf-8 -*-
2 |
3 |
Module to download a complete playlist from a youtube channel
4 |
5 |
import json
6 |
import logging
7 |
import re
8 |
9 |
10 |
from pytube import request
11 |
from pytube.
12 |
13 |
logger = logging.getLogger(__name__)
14 |
15 |
16 |
class Playlist(
17 |
18 |
19 |
20 |
21 |
def __init__(self, url, suppress_exception=False):
22 |
self.playlist_url = url
23 |
self.video_urls = []
24 |
self.suppress_exception = suppress_exception
25 |
26 |
27 |
28 |
29 |
is preferable to work with the later one.
30 |
31 |
:return: playlist url
32 |
33 |
34 |
if 'watch?v=' in self.playlist_url:
35 |
base_url = ''
36 |
playlist_code = self.playlist_url.split('&list=')[1]
37 |
return base_url + playlist_code
38 |
39 |
# url is already in the desired format, so just return it
40 |
return self.playlist_url
41 |
42 |
def _load_more_url(self, req):
43 |
"""Given an html page or a fragment thereof, looks for
44 |
and returns the "load more" url if found.
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
# The above only returns 100 or fewer links
69 |
# Simulating a browser request for the load more link
70 |
load_more_url = self.
71 |
72 |
73 |
req = request.get(load_more_url)
74 |
load_more = json.loads(req)
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
If the number of digits required to name a file,is less than is
106 |
required to name the last file,it prepends 0s.
107 |
So if you have a playlist of 100 videos it will number them like:
@@ -116,17 +181,17 @@ class Playlist(object):
116 |
start, stop, step = (1, len(self.video_urls) + 1, 1)
117 |
return (str(i).zfill(digits) for i in range(start, stop, step))
118 |
119 |
def download_all(
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
should be added to download resolution of choice
128 |
129 |
TODO(nficano): Add option to download resolution of user's choice
130 |
131 |
:param download_path:
132 |
(optional) Output path for the playlist If one is not
@@ -139,57 +204,49 @@ class Playlist(object):
139 |
:type prefix_number: bool
140 |
:param reverse_numbering:
141 |
(optional) Lets you number playlists in reverse, since some
142 |
playlists are ordered newest ->
143 |
:type reverse_numbering: bool
144 |
145 |
146 |
147 |
logger.debug('total videos found: %d', len(self.video_urls))
148 |
logger.debug('starting download')
149 |
150 |
prefix_gen = self._path_num_prefix_generator(reverse_numbering)
151 |
152 |
for link in self.video_urls:
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
logger.debug('file prefix is: %s', prefix)
172 |
-, filename_prefix=prefix)
173 |
174 |
175 |
logger.debug('download complete')
176 |
177 |
def title(self):
178 |
"""return playlist title (name)
179 |
180 |
181 |
182 |
183 |
184 |
end_tag = '</title>'
185 |
matchresult = re.compile(open_tag + '(.+?)' + end_tag)
186 |
matchresult =
187 |
matchresult = matchresult.replace(open_tag, '')
188 |
matchresult = matchresult.replace(end_tag, '')
189 |
matchresult = matchresult.replace('- YouTube', '')
190 |
matchresult = matchresult.strip()
191 |
192 |
return matchresult
193 |
except Exception as e:
194 |
195 |
return None
1 |
# -*- coding: utf-8 -*-
2 |
3 |
"""Module to download a complete playlist from a youtube channel."""
4 |
5 |
import json
6 |
import logging
7 |
import re
8 |
from datetime import date, datetime
9 |
from typing import List, Optional, Iterable, Dict, Union
10 |
from urllib.parse import parse_qs
11 |
from import Sequence
12 |
13 |
from pytube import request, YouTube
14 |
from pytube.helpers import cache, deprecated, install_proxy, uniqueify
15 |
16 |
logger = logging.getLogger(__name__)
17 |
18 |
19 |
class Playlist(Sequence):
20 |
"""Load a YouTube playlist with URL or ID"""
21 |
22 |
def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
23 |
if proxies:
24 |
25 |
26 |
27 |
self.playlist_id: str = parse_qs(url.split("?")[1])["list"][0]
28 |
except IndexError: # assume that url is just the id
29 |
self.playlist_id = url
30 |
31 |
self.playlist_url = f"{self.playlist_id}"
32 |
self.html = request.get(self.playlist_url)
33 |
34 |
# Needs testing with non-English
35 |
self.last_update: Optional[date] = None
36 |
date_match =
37 |
r"<li>Last updated on (\w{3}) (\d{1,2}), (\d{4})</li>", self.html
38 |
39 |
if date_match:
40 |
month, day, year = date_match.groups()
41 |
self.last_update = datetime.strptime(
42 |
f"{month} {day:0>2} {year}", "%b %d %Y"
43 |
44 |
45 |
self._video_regex = re.compile(r"href=\"(/watch\?v=[\w-]*)")
46 |
47 |
48 |
def _find_load_more_url(req: str) -> Optional[str]:
49 |
"""Given an html page or fragment, returns the "load more" url if found."""
50 |
match =
51 |
r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
52 |
53 |
54 |
if match:
55 |
return f"{}"
56 |
57 |
return None
58 |
59 |
@deprecated("This function will be removed in the future, please use .video_urls")
60 |
def parse_links(self) -> List[str]: # pragma: no cover
61 |
""" Deprecated function for returning list of URLs
62 |
63 |
:return: List[str]
64 |
65 |
return self.video_urls
66 |
67 |
def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:
68 |
"""Parse the video links from the page source, yields the /watch?v= part from video link
69 |
70 |
req = self.html
71 |
videos_urls = self._extract_videos(req)
72 |
if until_watch_id:
73 |
74 |
trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
75 |
yield videos_urls[:trim_index]
76 |
77 |
except ValueError:
78 |
79 |
yield videos_urls
80 |
81 |
# The above only returns 100 or fewer links
82 |
# Simulating a browser request for the load more link
83 |
load_more_url = self._find_load_more_url(req)
84 |
85 |
while load_more_url: # there is an url found
86 |
logger.debug("load more url: %s", load_more_url)
87 |
req = request.get(load_more_url)
88 |
load_more = json.loads(req)
89 |
90 |
html = load_more["content_html"]
91 |
except KeyError:
92 |
logger.debug("Could not find content_html")
93 |
94 |
videos_urls = self._extract_videos(html)
95 |
if until_watch_id:
96 |
97 |
trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
98 |
yield videos_urls[:trim_index]
99 |
100 |
except ValueError:
101 |
102 |
yield videos_urls
103 |
104 |
load_more_url = self._find_load_more_url(
105 |
106 |
107 |
108 |
109 |
110 |
def _extract_videos(self, html: str) -> List[str]:
111 |
return uniqueify(self._video_regex.findall(html))
112 |
113 |
def trimmed(self, video_id: str) -> Iterable[str]:
114 |
"""Retrieve a list of YouTube video URLs trimmed at the given video ID
115 |
116 |
i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns [1,2]
117 |
:type video_id: str
118 |
video ID to trim the returned list of playlist URLs at
119 |
:rtype: List[str]
120 |
121 |
List of video URLs from the playlist trimmed at the given ID
122 |
123 |
for page in self._paginate(until_watch_id=video_id):
124 |
yield from (self._video_url(watch_path) for watch_path in page)
125 |
126 |
@property # type: ignore
127 |
128 |
def video_urls(self) -> List[str]:
129 |
"""Complete links of all the videos in playlist
130 |
131 |
:rtype: List[str]
132 |
:returns: List of video URLs
133 |
134 |
return [
135 |
self._video_url(video) for page in list(self._paginate()) for video in page
136 |
137 |
138 |
139 |
def videos(self) -> Iterable[YouTube]:
140 |
"""Yields YouTube objects of videos in this playlist
141 |
142 |
:Yields: YouTube
143 |
144 |
yield from (YouTube(url) for url in self.video_urls)
145 |
146 |
def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
147 |
return self.video_urls[i]
148 |
149 |
def __len__(self) -> int:
150 |
return len(self.video_urls)
151 |
152 |
def __repr__(self) -> str:
153 |
return f"{self.video_urls}"
154 |
155 |
156 |
"This call is unnecessary, you can directly access .video_urls or .videos"
157 |
158 |
def populate_video_urls(self) -> List[str]: # pragma: no cover
159 |
"""Complete links of all the videos in playlist
160 |
161 |
:rtype: List[str]
162 |
:returns: List of video URLs
163 |
164 |
return self.video_urls
165 |
166 |
@deprecated("This function will be removed in the future.")
167 |
def _path_num_prefix_generator(self, reverse=False): # pragma: no cover
168 |
"""Generate number prefixes for the items in the playlist.
169 |
170 |
If the number of digits required to name a file,is less than is
171 |
required to name the last file,it prepends 0s.
172 |
So if you have a playlist of 100 videos it will number them like:
181 |
start, stop, step = (1, len(self.video_urls) + 1, 1)
182 |
return (str(i).zfill(digits) for i in range(start, stop, step))
183 |
184 |
185 |
"This function will be removed in the future. Please iterate through .videos"
186 |
187 |
def download_all(
188 |
189 |
download_path: Optional[str] = None,
190 |
prefix_number: bool = True,
191 |
reverse_numbering: bool = False,
192 |
resolution: str = "720p",
193 |
) -> None: # pragma: no cover
194 |
"""Download all the videos in the the playlist.
195 |
196 |
:param download_path:
197 |
(optional) Output path for the playlist If one is not
204 |
:type prefix_number: bool
205 |
:param reverse_numbering:
206 |
(optional) Lets you number playlists in reverse, since some
207 |
playlists are ordered newest -> oldest.
208 |
:type reverse_numbering: bool
209 |
:param resolution:
210 |
Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
211 |
:type resolution: str
212 |
213 |
logger.debug("total videos found: %d", len(self.video_urls))
214 |
logger.debug("starting download")
215 |
216 |
prefix_gen = self._path_num_prefix_generator(reverse_numbering)
217 |
218 |
for link in self.video_urls:
219 |
youtube = YouTube(link)
220 |
dl_stream = (
221 |
222 |
or youtube.streams.get_lowest_resolution()
223 |
224 |
assert dl_stream is not None
225 |
226 |
logger.debug("download path: %s", download_path)
227 |
if prefix_number:
228 |
prefix = next(prefix_gen)
229 |
logger.debug("file prefix is: %s", prefix)
230 |
+, filename_prefix=prefix)
231 |
232 |
233 |
logger.debug("download complete")
234 |
235 |
236 |
def title(self) -> Optional[str]:
237 |
"""Extract playlist title
238 |
239 |
:return: playlist title (name)
240 |
:rtype: Optional[str]
241 |
242 |
pattern = re.compile("<title>(.+?)</title>")
243 |
match =
244 |
245 |
if match is None:
246 |
return None
247 |
248 |
return"- YouTube", "").strip()
249 |
250 |
251 |
def _video_url(watch_path: str):
252 |
return f"{watch_path}"
@@ -1,6 +1,7 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""Library specific exception definitions."""
3 |
4 |
5 |
6 |
class PytubeError(Exception):
@@ -15,30 +16,47 @@ class PytubeError(Exception):
15 |
class ExtractError(PytubeError):
16 |
"""Data extraction based exception."""
17 |
18 |
def __init__(self, msg, video_id=None):
19 |
"""Construct an instance of a :class:`ExtractError <ExtractError>`.
20 |
21 |
22 |
23 |
:param str video_id:
24 |
A YouTube video identifier.
25 |
26 |
27 |
msg = '{video_id}: {msg}'.format(video_id=video_id, msg=msg)
28 |
29 |
super(ExtractError, self).__init__(msg)
30 |
31 |
self.exc_info = sys.exc_info()
32 |
self.video_id = video_id
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
"""Video is a live stream."""
41 |
42 |
43 |
44 |
1 |
# -*- coding: utf-8 -*-
2 |
3 |
"""Library specific exception definitions."""
4 |
from typing import Union, Pattern
5 |
6 |
7 |
class PytubeError(Exception):
16 |
class ExtractError(PytubeError):
17 |
"""Data extraction based exception."""
18 |
19 |
20 |
class RegexMatchError(ExtractError):
21 |
"""Regex pattern did not return any matches."""
22 |
23 |
def __init__(self, caller: str, pattern: Union[str, Pattern]):
24 |
25 |
:param str caller:
26 |
Calling function
27 |
:param str pattern:
28 |
Pattern that failed to match
29 |
30 |
super().__init__(f"{caller}: could not find match for {pattern}")
31 |
self.caller = caller
32 |
self.pattern = pattern
33 |
34 |
35 |
class LiveStreamError(ExtractError):
36 |
"""Video is a live stream."""
37 |
38 |
def __init__(self, video_id: str):
39 |
40 |
:param str video_id:
41 |
A YouTube video identifier.
42 |
43 |
super().__init__(f"{video_id} is streaming live and cannot be loaded")
44 |
45 |
self.video_id = video_id
46 |
47 |
48 |
class VideoUnavailable(PytubeError):
49 |
"""Video is unavailable."""
50 |
51 |
def __init__(self, video_id: str):
52 |
53 |
:param str video_id:
54 |
A YouTube video identifier.
55 |
56 |
super().__init__(f"{video_id} is unavailable")
57 |
58 |
self.video_id = video_id
59 |
60 |
61 |
class HTMLParseError(PytubeError):
62 |
"""HTML could not be parsed"""
@@ -1,43 +1,52 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""This module contains all non-cipher related data extraction logic."""
3 |
import json
4 |
from collections import OrderedDict
5 |
6 |
from pytube.
7 |
from pytube.
8 |
from pytube.compat import urlencode
9 |
from pytube.exceptions import RegexMatchError
10 |
from pytube.helpers import regex_search
11 |
12 |
13 |
class PytubeHTMLParser(HTMLParser):
14 |
in_vid_descr = False
15 |
in_vid_descr_br = False
16 |
vid_descr =
17 |
18 |
def handle_starttag(self, tag, attrs):
19 |
if tag ==
20 |
for attr in attrs:
21 |
if attr[0] ==
22 |
self.in_vid_descr = True
23 |
24 |
def handle_endtag(self, tag):
25 |
if self.in_vid_descr and tag ==
26 |
self.in_vid_descr = False
27 |
28 |
def handle_startendtag(self, tag, attrs):
29 |
if self.in_vid_descr and tag ==
30 |
self.in_vid_descr_br = True
31 |
32 |
def handle_data(self, data):
33 |
if self.in_vid_descr_br:
34 |
self.vid_descr +=
35 |
self.in_vid_descr_br = False
36 |
elif self.in_vid_descr:
37 |
self.vid_descr += data
38 |
39 |
40 |
def is_age_restricted(watch_html):
41 |
"""Check if content is age restricted.
42 |
43 |
:param str watch_html:
@@ -47,13 +56,13 @@ def is_age_restricted(watch_html):
47 |
Whether or not the content is age restricted.
48 |
49 |
50 |
51 |
except RegexMatchError:
52 |
return False
53 |
return True
54 |
55 |
56 |
def video_id(url):
57 |
"""Extract the ``video_id`` from a YouTube url.
58 |
59 |
This function supports the following patterns:
@@ -68,88 +77,74 @@ def video_id(url):
68 |
69 |
YouTube video id.
70 |
71 |
return regex_search(r
72 |
73 |
74 |
75 |
76 |
77 |
:param str video_id:
78 |
A YouTube video identifier.
79 |
:rtype: str
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
"""Construct the video_info url.
99 |
100 |
:param str video_id:
101 |
A YouTube video identifier.
102 |
:param str watch_url:
103 |
A YouTube watch url.
104 |
:param str watch_html:
105 |
The html contents of the watch page.
106 |
:param str embed_html:
107 |
The html contents of the embed page (for age restricted videos).
108 |
:param bool age_restricted:
109 |
Is video age restricted.
110 |
:rtype: str
111 |
112 |
:samp:`` with necessary GET
113 |
114 |
115 |
116 |
sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
params = OrderedDict([
126 |
('video_id', video_id),
127 |
('el', '$el'),
128 |
('ps', 'default'),
129 |
('eurl', quote(watch_url)),
130 |
('hl', 'en_US'),
131 |
132 |
return '' + urlencode(params)
133 |
134 |
135 |
136 |
"""Get the base JavaScript url.
137 |
138 |
Construct the base JavaScript url, which contains the decipher
139 |
140 |
141 |
:param str
142 |
The html contents of the watch page.
143 |
:param bool age_restricted:
144 |
Is video age restricted.
145 |
146 |
147 |
148 |
149 |
return '' + base_js
150 |
151 |
152 |
def mime_type_codec(mime_type_codec):
153 |
"""Parse the type data.
154 |
155 |
Breaks up the data in the ``type`` key of the manifest, which contains the
@@ -158,8 +153,7 @@ def mime_type_codec(mime_type_codec):
158 |
159 |
160 |
161 |
162 |
('audio/webm', ['opus'])
163 |
164 |
:param str mime_type_codec:
165 |
String containing mime type and codecs.
@@ -168,35 +162,160 @@ def mime_type_codec(mime_type_codec):
168 |
The mime type and a list of codecs.
169 |
170 |
171 |
pattern = r
172 |
173 |
174 |
175 |
176 |
def get_ytplayer_config(html
177 |
"""Get the YouTube player configuration data from the watch html.
178 |
179 |
Extract the ``ytplayer_config``, which is json data embedded within the
180 |
watch html and serves as the primary source of obtaining the stream
181 |
manifest data.
182 |
183 |
:param str
184 |
The html contents of the watch page.
185 |
:param bool age_restricted:
186 |
Is video age restricted.
187 |
:rtype: str
188 |
189 |
Substring of the html containing the encoded manifest data.
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
html_parser = PytubeHTMLParser()
201 |
202 |
return html_parser.vid_descr
1 |
# -*- coding: utf-8 -*-
2 |
"""This module contains all non-cipher related data extraction logic."""
3 |
import json
4 |
import logging
5 |
import re
6 |
from collections import OrderedDict
7 |
from html.parser import HTMLParser
8 |
from typing import Any, Optional, Tuple, List, Dict
9 |
from urllib.parse import quote, parse_qs, unquote, parse_qsl
10 |
from urllib.parse import urlencode
11 |
12 |
from pytube.cipher import Cipher
13 |
from pytube.exceptions import RegexMatchError, HTMLParseError, LiveStreamError
14 |
from pytube.helpers import regex_search
15 |
16 |
logger = logging.getLogger(__name__)
17 |
18 |
19 |
class PytubeHTMLParser(HTMLParser):
20 |
in_vid_descr = False
21 |
in_vid_descr_br = False
22 |
vid_descr = ""
23 |
24 |
def handle_starttag(self, tag, attrs):
25 |
if tag == "p":
26 |
for attr in attrs:
27 |
if attr[0] == "id" and attr[1] == "eow-description":
28 |
self.in_vid_descr = True
29 |
30 |
def handle_endtag(self, tag):
31 |
if self.in_vid_descr and tag == "p":
32 |
self.in_vid_descr = False
33 |
34 |
def handle_startendtag(self, tag, attrs):
35 |
if self.in_vid_descr and tag == "br":
36 |
self.in_vid_descr_br = True
37 |
38 |
def handle_data(self, data):
39 |
if self.in_vid_descr_br:
40 |
self.vid_descr += f"\n{data}"
41 |
self.in_vid_descr_br = False
42 |
elif self.in_vid_descr:
43 |
self.vid_descr += data
44 |
45 |
def error(self, message):
46 |
raise HTMLParseError(message)
47 |
48 |
49 |
def is_age_restricted(watch_html: str) -> bool:
50 |
"""Check if content is age restricted.
51 |
52 |
:param str watch_html:
56 |
Whether or not the content is age restricted.
57 |
58 |
59 |
regex_search(r"og:restrictions:age", watch_html, group=0)
60 |
except RegexMatchError:
61 |
return False
62 |
return True
63 |
64 |
65 |
def video_id(url: str) -> str:
66 |
"""Extract the ``video_id`` from a YouTube url.
67 |
68 |
This function supports the following patterns:
77 |
78 |
YouTube video id.
79 |
80 |
return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1)
81 |
82 |
83 |
def video_info_url(video_id: str, watch_url: str) -> str:
84 |
"""Construct the video_info url.
85 |
86 |
:param str video_id:
87 |
A YouTube video identifier.
88 |
:param str watch_url:
89 |
A YouTube watch url.
90 |
:rtype: str
91 |
92 |
:samp:`` with necessary GET
93 |
94 |
95 |
params = OrderedDict(
96 |
97 |
("video_id", video_id),
98 |
("el", "$el"),
99 |
("ps", "default"),
100 |
("eurl", quote(watch_url)),
101 |
("hl", "en_US"),
102 |
103 |
104 |
return _video_info_url(params)
105 |
106 |
107 |
def video_info_url_age_restricted(video_id: str, embed_html: str) -> str:
108 |
"""Construct the video_info url.
109 |
110 |
:param str video_id:
111 |
A YouTube video identifier.
112 |
:param str embed_html:
113 |
The html contents of the embed page (for age restricted videos).
114 |
:rtype: str
115 |
116 |
:samp:`` with necessary GET
117 |
118 |
119 |
120 |
sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
121 |
except RegexMatchError:
122 |
sts = ""
123 |
# Here we use ``OrderedDict`` so that the output is consistent between
124 |
# Python 2.7+.
125 |
eurl = f"{video_id}"
126 |
params = OrderedDict([("video_id", video_id), ("eurl", eurl), ("sts", sts),])
127 |
return _video_info_url(params)
128 |
129 |
130 |
def _video_info_url(params: OrderedDict) -> str:
131 |
return "" + urlencode(params)
132 |
133 |
134 |
def js_url(html: str) -> str:
135 |
"""Get the base JavaScript url.
136 |
137 |
Construct the base JavaScript url, which contains the decipher
138 |
139 |
140 |
:param str html:
141 |
The html contents of the watch page.
142 |
143 |
base_js = get_ytplayer_config(html)["assets"]["js"]
144 |
return "" + base_js
145 |
146 |
147 |
def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
148 |
"""Parse the type data.
149 |
150 |
Breaks up the data in the ``type`` key of the manifest, which contains the
153 |
154 |
155 |
156 |
mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])
157 |
158 |
:param str mime_type_codec:
159 |
String containing mime type and codecs.
162 |
The mime type and a list of codecs.
163 |
164 |
165 |
pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
166 |
regex = re.compile(pattern)
167 |
results =
168 |
if not results:
169 |
raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
170 |
mime_type, codecs = results.groups()
171 |
return mime_type, [c.strip() for c in codecs.split(",")]
172 |
173 |
174 |
def get_ytplayer_config(html: str) -> Any:
175 |
"""Get the YouTube player configuration data from the watch html.
176 |
177 |
Extract the ``ytplayer_config``, which is json data embedded within the
178 |
watch html and serves as the primary source of obtaining the stream
179 |
manifest data.
180 |
181 |
:param str html:
182 |
The html contents of the watch page.
183 |
:rtype: str
184 |
185 |
Substring of the html containing the encoded manifest data.
186 |
187 |
config_patterns = [
188 |
189 |
190 |
191 |
r";yt\.setConfig\(\{'PLAYER_CONFIG':\s*({.*})(,'EXPERIMENT_FLAGS'|;)", # noqa: E501
192 |
193 |
logger.debug("finding initial function name")
194 |
for pattern in config_patterns:
195 |
regex = re.compile(pattern)
196 |
function_match =
197 |
if function_match:
198 |
logger.debug("finished regex search, matched: %s", pattern)
199 |
yt_player_config =
200 |
return json.loads(yt_player_config)
201 |
202 |
raise RegexMatchError(caller="get_ytplayer_config", pattern="config_patterns")
203 |
204 |
205 |
def _get_vid_descr(html: Optional[str]) -> str:
206 |
html_parser = PytubeHTMLParser()
207 |
if html:
208 |
209 |
return html_parser.vid_descr
210 |
211 |
212 |
def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
213 |
"""Apply the decrypted signature to the stream manifest.
214 |
215 |
:param dict config_args:
216 |
Details of the media streams available.
217 |
:param str fmt:
218 |
Key in stream manifests (``ytplayer_config``) containing progressive
219 |
download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
220 |
221 |
:param str js:
222 |
The contents of the base.js asset file.
223 |
224 |
225 |
cipher = Cipher(js=js)
226 |
stream_manifest = config_args[fmt]
227 |
228 |
for i, stream in enumerate(stream_manifest):
229 |
230 |
url: str = stream["url"]
231 |
except KeyError:
232 |
live_stream = (
233 |
234 |
.get("playabilityStatus", {},)
235 |
236 |
237 |
if live_stream:
238 |
raise LiveStreamError("UNKNOWN")
239 |
# 403 Forbidden fix.
240 |
if "signature" in url or (
241 |
"s" not in stream and ("&sig=" in url or "&lsig=" in url)
242 |
243 |
# For certain videos, YouTube will just provide them pre-signed, in
244 |
# which case there's no real magic to download them and we can skip
245 |
# the whole signature descrambling entirely.
246 |
logger.debug("signature found, skip decipher")
247 |
248 |
249 |
signature = cipher.get_signature(ciphered_signature=stream["s"])
250 |
251 |
logger.debug("finished descrambling signature for itag=%s", stream["itag"])
252 |
# 403 forbidden fix
253 |
stream_manifest[i]["url"] = url + "&sig=" + signature
254 |
255 |
256 |
def apply_descrambler(stream_data: Dict, key: str) -> None:
257 |
"""Apply various in-place transforms to YouTube's media stream data.
258 |
259 |
Creates a ``list`` of dictionaries by string splitting on commas, then
260 |
taking each list item, parsing it as a query string, converting it to a
261 |
``dict`` and unquoting the value.
262 |
263 |
:param dict stream_data:
264 |
Dictionary containing query string encoded values.
265 |
:param str key:
266 |
Name of the key in dictionary.
267 |
268 |
269 |
270 |
>>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
271 |
>>> apply_descrambler(d, 'foo')
272 |
>>> print(d)
273 |
{'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
274 |
275 |
276 |
277 |
278 |
if key == "url_encoded_fmt_stream_map" and not stream_data.get(
279 |
280 |
281 |
formats = json.loads(stream_data["player_response"])["streamingData"]["formats"]
282 |
283 |
284 |
285 |
286 |
287 |
288 |
stream_data[key] = [
289 |
290 |
"url": format_item["url"],
291 |
"type": format_item["mimeType"],
292 |
"quality": format_item["quality"],
293 |
"itag": format_item["itag"],
294 |
"bitrate": format_item.get("bitrate"),
295 |
"is_otf": (format_item.get("type") == otf_type),
296 |
297 |
for format_item in formats
298 |
299 |
except KeyError:
300 |
cipher_url = [
301 |
parse_qs(formats[i]["cipher"]) for i, data in enumerate(formats)
302 |
303 |
stream_data[key] = [
304 |
305 |
"url": cipher_url[i]["url"][0],
306 |
"s": cipher_url[i]["s"][0],
307 |
"type": format_item["mimeType"],
308 |
"quality": format_item["quality"],
309 |
"itag": format_item["itag"],
310 |
"bitrate": format_item.get("bitrate"),
311 |
"is_otf": (format_item.get("type") == otf_type),
312 |
313 |
for i, format_item in enumerate(formats)
314 |
315 |
316 |
stream_data[key] = [
317 |
{k: unquote(v) for k, v in parse_qsl(i)}
318 |
for i in stream_data[key].split(",")
319 |
320 |
321 |
logger.debug("applying descrambler")
@@ -1,107 +1,44 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""Various helper functions implemented by pytube."""
3 |
from __future__ import absolute_import
4 |
5 |
import logging
6 |
7 |
import re
8 |
9 |
from pytube.compat import unicode
10 |
from pytube.exceptions import RegexMatchError
11 |
12 |
13 |
logger = logging.getLogger(__name__)
14 |
15 |
16 |
def regex_search(pattern, string
17 |
"""Shortcut method to search a string for a given pattern.
18 |
19 |
:param str pattern:
20 |
A regular expression pattern.
21 |
:param str string:
22 |
A target string to search.
23 |
:param bool groups:
24 |
Should the return value be ``.groups()``.
25 |
:param int group:
26 |
Index of group to return.
27 |
:param int flags:
28 |
Expression behavior modifiers.
29 |
30 |
str or tuple
31 |
32 |
Substring pattern matches.
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
'finished regex search: %s',
46 |
47 |
48 |
'pattern': p,
49 |
50 |
}, indent=2,
51 |
52 |
53 |
if groups:
54 |
return results.groups()
55 |
elif group is not None:
56 |
57 |
58 |
return results
59 |
60 |
regex = re.compile(pattern, flags)
61 |
results =
62 |
if not results:
63 |
raise RegexMatchError(
64 |
'regex pattern ({pattern}) had zero matches'
65 |
66 |
67 |
68 |
69 |
'finished regex search: %s',
70 |
71 |
72 |
'pattern': pattern,
73 |
74 |
}, indent=2,
75 |
76 |
77 |
if groups:
78 |
return results.groups()
79 |
elif group is not None:
80 |
81 |
82 |
return results
83 |
84 |
85 |
def apply_mixin(dct, key, func, *args, **kwargs):
86 |
r"""Apply in-place data mutation to a dictionary.
87 |
88 |
:param dict dct:
89 |
Dictionary to apply mixin function to.
90 |
:param str key:
91 |
Key within dictionary to apply mixin function to.
92 |
:param callable func:
93 |
Transform function to apply to ``dct[key]``.
94 |
:param \*args:
95 |
(optional) positional arguments that ``func`` takes.
96 |
:param \*\*kwargs:
97 |
(optional) keyword arguments that ``func`` takes.
98 |
99 |
100 |
101 |
dct[key] = func(dct[key], *args, **kwargs)
102 |
103 |
104 |
def safe_filename(s, max_length=255):
105 |
"""Sanitize a string making it safe to use as a filename.
106 |
107 |
This function was based off the limitations outlined here:
@@ -116,12 +53,120 @@ def safe_filename(s, max_length=255):
116 |
A sanitized string.
117 |
118 |
# Characters in range 0-31 (0x00-0x1F) are not allowed in ntfs filenames.
119 |
120 |
121 |
122 |
123 |
124 |
pattern =
125 |
regex = re.compile(pattern, re.UNICODE)
126 |
filename = regex.sub(
127 |
1 |
# -*- coding: utf-8 -*-
2 |
3 |
"""Various helper functions implemented by pytube."""
4 |
import functools
5 |
import logging
6 |
import os
7 |
import re
8 |
import warnings
9 |
from typing import TypeVar, Callable, Optional, Dict, List, Any
10 |
from urllib import request
11 |
12 |
from pytube.exceptions import RegexMatchError
13 |
14 |
logger = logging.getLogger(__name__)
15 |
16 |
17 |
def regex_search(pattern: str, string: str, group: int) -> str:
18 |
"""Shortcut method to search a string for a given pattern.
19 |
20 |
:param str pattern:
21 |
A regular expression pattern.
22 |
:param str string:
23 |
A target string to search.
24 |
:param int group:
25 |
Index of group to return.
26 |
27 |
str or tuple
28 |
29 |
Substring pattern matches.
30 |
31 |
regex = re.compile(pattern)
32 |
results =
33 |
if not results:
34 |
raise RegexMatchError(caller="regex_search", pattern=pattern)
35 |
36 |
logger.debug("matched regex search: %s", pattern)
37 |
38 |
39 |
40 |
41 |
def safe_filename(s: str, max_length: int = 255) -> str:
42 |
"""Sanitize a string making it safe to use as a filename.
43 |
44 |
This function was based off the limitations outlined here:
53 |
A sanitized string.
54 |
55 |
# Characters in range 0-31 (0x00-0x1F) are not allowed in ntfs filenames.
56 |
ntfs_characters = [chr(i) for i in range(0, 31)]
57 |
characters = [
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
pattern = "|".join(ntfs_characters + characters)
80 |
regex = re.compile(pattern, re.UNICODE)
81 |
filename = regex.sub("", s)
82 |
return filename[:max_length].rsplit(" ", 0)[0]
83 |
84 |
85 |
def setup_logger(level: int = logging.ERROR):
86 |
"""Create a configured instance of logger.
87 |
88 |
:param int level:
89 |
Describe the severity level of the logs to handle.
90 |
91 |
fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
92 |
date_fmt = "%H:%M:%S"
93 |
formatter = logging.Formatter(fmt, datefmt=date_fmt)
94 |
95 |
handler = logging.StreamHandler()
96 |
97 |
98 |
99 |
logger = logging.getLogger("pytube")
100 |
101 |
102 |
103 |
104 |
GenericType = TypeVar("GenericType")
105 |
106 |
107 |
def cache(func: Callable[..., GenericType]) -> GenericType:
108 |
""" mypy compatible annotation wrapper for lru_cache"""
109 |
return functools.lru_cache()(func) # type: ignore
110 |
111 |
112 |
def deprecated(reason: str) -> Callable:
113 |
114 |
This is a decorator which can be used to mark functions
115 |
as deprecated. It will result in a warning being emitted
116 |
when the function is used.
117 |
118 |
119 |
def decorator(func1):
120 |
message = "Call to deprecated function {name} ({reason})."
121 |
122 |
123 |
def new_func1(*args, **kwargs):
124 |
warnings.simplefilter("always", DeprecationWarning)
125 |
126 |
message.format(name=func1.__name__, reason=reason),
127 |
128 |
129 |
130 |
warnings.simplefilter("default", DeprecationWarning)
131 |
return func1(*args, **kwargs)
132 |
133 |
return new_func1
134 |
135 |
return decorator
136 |
137 |
138 |
def target_directory(output_path: Optional[str] = None) -> str:
139 |
140 |
Function for determining target directory of a download.
141 |
Returns an absolute path (if relative one given) or the current
142 |
path (if none given). Makes directory if it does not exist.
143 |
144 |
:type output_path: str
145 |
:rtype: str
146 |
147 |
An absolute directory path as a string.
148 |
149 |
if output_path:
150 |
if not os.path.isabs(output_path):
151 |
output_path = os.path.join(os.getcwd(), output_path)
152 |
153 |
output_path = os.getcwd()
154 |
os.makedirs(output_path, exist_ok=True)
155 |
return output_path
156 |
157 |
158 |
def install_proxy(proxy_handler: Dict[str, str]) -> None:
159 |
proxy_support = request.ProxyHandler(proxy_handler)
160 |
opener = request.build_opener(proxy_support)
161 |
162 |
163 |
164 |
def uniqueify(duped_list: List) -> List:
165 |
seen: Dict[Any, bool] = {}
166 |
result = []
167 |
for item in duped_list:
168 |
if item in seen:
169 |
170 |
seen[item] = True
171 |
172 |
return result
@@ -1,92 +1,91 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""This module contains a lookup table of YouTube's itag values."""
3 |
4 |
5 |
5: (
6 |
6: (
7 |
13: (
8 |
17: (
9 |
18: (
10 |
22: (
11 |
34: (
12 |
35: (
13 |
36: (
14 |
37: (
15 |
38: (
16 |
43: (
17 |
44: (
18 |
45: (
19 |
46: (
20 |
59: (
21 |
78: (
22 |
82: (
23 |
83: (
24 |
84: (
25 |
85: (
26 |
91: (
27 |
92: (
28 |
93: (
29 |
94: (
30 |
95: (
31 |
96: (
32 |
100: (
33 |
101: (
34 |
102: (
35 |
132: (
36 |
151: (
37 |
38 |
# DASH Video
39 |
133: (
40 |
134: (
41 |
135: (
42 |
136: (
43 |
137: (
44 |
138: (
45 |
160: (
46 |
167: (
47 |
168: (
48 |
169: (
49 |
170: (
50 |
212: (
51 |
218: (
52 |
219: (
53 |
242: (
54 |
243: (
55 |
244: (
56 |
245: (
57 |
246: (
58 |
247: (
59 |
248: (
60 |
264: (
61 |
266: (
62 |
271: (
63 |
272: (
64 |
278: (
65 |
298: (
66 |
299: (
67 |
302: (
68 |
303: (
69 |
308: (
70 |
313: (
71 |
315: (
72 |
330: (
73 |
331: (
74 |
332: (
75 |
333: (
76 |
334: (
77 |
335: (
78 |
336: (
79 |
337: (
80 |
81 |
# DASH Audio
82 |
139: (None,
83 |
140: (None,
84 |
141: (None,
85 |
171: (None,
86 |
172: (None,
87 |
249: (None,
88 |
250: (None,
89 |
251: (None,
90 |
256: (None, None),
91 |
258: (None, None),
92 |
325: (None, None),
@@ -97,10 +96,36 @@ HDR = [330, 331, 332, 333, 334, 335, 336, 337]
97 |
_60FPS = [298, 299, 302, 303, 308, 315] + HDR
98 |
_3D = [82, 83, 84, 85, 100, 101, 102]
99 |
LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
100 |
101 |
102 |
def get_format_profile(itag):
103 |
104 |
105 |
:param str itag:
106 |
YouTube format identifier code.
@@ -111,10 +136,14 @@ def get_format_profile(itag):
111 |
112 |
res, bitrate = None, None
113 |
return {
114 |
115 |
116 |
117 |
118 |
119 |
120 |
1 |
# -*- coding: utf-8 -*-
2 |
"""This module contains a lookup table of YouTube's itag values."""
3 |
from typing import Dict
4 |
5 |
6 |
5: ("240p", "64kbps"),
7 |
6: ("270p", "64kbps"),
8 |
13: ("144p", None),
9 |
17: ("144p", "24kbps"),
10 |
18: ("360p", "96kbps"),
11 |
22: ("720p", "192kbps"),
12 |
34: ("360p", "128kbps"),
13 |
35: ("480p", "128kbps"),
14 |
36: ("240p", None),
15 |
37: ("1080p", "192kbps"),
16 |
38: ("3072p", "192kbps"),
17 |
43: ("360p", "128kbps"),
18 |
44: ("480p", "128kbps"),
19 |
45: ("720p", "192kbps"),
20 |
46: ("1080p", "192kbps"),
21 |
59: ("480p", "128kbps"),
22 |
78: ("480p", "128kbps"),
23 |
82: ("360p", "128kbps"),
24 |
83: ("480p", "128kbps"),
25 |
84: ("720p", "192kbps"),
26 |
85: ("1080p", "192kbps"),
27 |
91: ("144p", "48kbps"),
28 |
92: ("240p", "48kbps"),
29 |
93: ("360p", "128kbps"),
30 |
94: ("480p", "128kbps"),
31 |
95: ("720p", "256kbps"),
32 |
96: ("1080p", "256kbps"),
33 |
100: ("360p", "128kbps"),
34 |
101: ("480p", "192kbps"),
35 |
102: ("720p", "192kbps"),
36 |
132: ("240p", "48kbps"),
37 |
151: ("720p", "24kbps"),
38 |
# DASH Video
39 |
133: ("240p", None),
40 |
134: ("360p", None),
41 |
135: ("480p", None),
42 |
136: ("720p", None),
43 |
137: ("1080p", None),
44 |
138: ("2160p", None),
45 |
160: ("144p", None),
46 |
167: ("360p", None),
47 |
168: ("480p", None),
48 |
169: ("720p", None),
49 |
170: ("1080p", None),
50 |
212: ("480p", None),
51 |
218: ("480p", None),
52 |
219: ("480p", None),
53 |
242: ("240p", None),
54 |
243: ("360p", None),
55 |
244: ("480p", None),
56 |
245: ("480p", None),
57 |
246: ("480p", None),
58 |
247: ("720p", None),
59 |
248: ("1080p", None),
60 |
264: ("1440p", None),
61 |
266: ("2160p", None),
62 |
271: ("1440p", None),
63 |
272: ("2160p", None),
64 |
278: ("144p", None),
65 |
298: ("720p", None),
66 |
299: ("1080p", None),
67 |
302: ("720p", None),
68 |
303: ("1080p", None),
69 |
308: ("1440p", None),
70 |
313: ("2160p", None),
71 |
315: ("2160p", None),
72 |
330: ("144p", None),
73 |
331: ("240p", None),
74 |
332: ("360p", None),
75 |
333: ("480p", None),
76 |
334: ("720p", None),
77 |
335: ("1080p", None),
78 |
336: ("1440p", None),
79 |
337: ("2160p", None),
80 |
# DASH Audio
81 |
139: (None, "48kbps"),
82 |
140: (None, "128kbps"),
83 |
141: (None, "256kbps"),
84 |
171: (None, "128kbps"),
85 |
172: (None, "256kbps"),
86 |
249: (None, "50kbps"),
87 |
250: (None, "70kbps"),
88 |
251: (None, "160kbps"),
89 |
256: (None, None),
90 |
258: (None, None),
91 |
325: (None, None),
96 |
_60FPS = [298, 299, 302, 303, 308, 315] + HDR
97 |
_3D = [82, 83, 84, 85, 100, 101, 102]
98 |
LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
99 |
DASH_MP4_VIDEO = [133, 134, 135, 136, 137, 138, 160, 212, 264, 266, 298, 299]
100 |
DASH_MP4_AUDIO = [139, 140, 141, 256, 258, 325, 328]
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
DASH_WEBM_AUDIO = [171, 172, 249, 250, 251]
125 |
126 |
127 |
def get_format_profile(itag: int) -> Dict:
128 |
"""Get additional format information for a given itag.
129 |
130 |
:param str itag:
131 |
YouTube format identifier code.
136 |
137 |
res, bitrate = None, None
138 |
return {
139 |
"resolution": res,
140 |
"abr": bitrate,
141 |
"is_live": itag in LIVE,
142 |
"is_3d": itag in _3D,
143 |
"is_hdr": itag in HDR,
144 |
"fps": 60 if itag in _60FPS else 30,
145 |
"is_dash": itag in DASH_MP4_VIDEO
146 |
or itag in DASH_MP4_AUDIO
147 |
or itag in DASH_WEBM_VIDEO
148 |
or itag in DASH_WEBM_AUDIO,
149 |
@@ -1,25 +0,0 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""This module implements a log factory."""
3 |
from __future__ import absolute_import
4 |
5 |
import logging
6 |
7 |
8 |
def create_logger(level=logging.ERROR):
9 |
"""Create a configured instance of logger.
10 |
11 |
:param int level:
12 |
Describe the severity level of the logs to handle.
13 |
14 |
fmt = '[%(asctime)s] %(levelname)s in %(module)s: %(message)s'
15 |
date_fmt = '%H:%M:%S'
16 |
formatter = logging.Formatter(fmt, datefmt=date_fmt)
17 |
18 |
handler = logging.StreamHandler()
19 |
20 |
21 |
22 |
logger = logging.getLogger('pytube')
23 |
24 |
25 |
return logger
@@ -1,101 +0,0 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""Applies in-place data mutations."""
3 |
from __future__ import absolute_import
4 |
5 |
import json
6 |
import logging
7 |
import pprint
8 |
9 |
from pytube import cipher
10 |
from pytube.compat import parse_qsl
11 |
from pytube.compat import unquote
12 |
from pytube.exceptions import LiveStreamError
13 |
14 |
15 |
logger = logging.getLogger(__name__)
16 |
17 |
18 |
def apply_signature(config_args, fmt, js):
19 |
"""Apply the decrypted signature to the stream manifest.
20 |
21 |
:param dict config_args:
22 |
Details of the media streams available.
23 |
:param str fmt:
24 |
Key in stream manifests (``ytplayer_config``) containing progressive
25 |
download or adaptive streams (e.g.: ``url_encoded_fmt_stream_map`` or
26 |
27 |
:param str js:
28 |
The contents of the base.js asset file.
29 |
30 |
31 |
stream_manifest = config_args[fmt]
32 |
live_stream = json.loads(config_args['player_response']).get(
33 |
'playabilityStatus', {},
34 |
35 |
for i, stream in enumerate(stream_manifest):
36 |
if 'url' in stream:
37 |
url = stream['url']
38 |
elif live_stream:
39 |
raise LiveStreamError('Video is currently being streamed live')
40 |
# 403 Forbidden fix.
41 |
if (
42 |
'signature' in url or (
43 |
's' not in stream and (
44 |
'&sig=' in url or '&lsig=' in url
45 |
46 |
47 |
48 |
# For certain videos, YouTube will just provide them pre-signed, in
49 |
# which case there's no real magic to download them and we can skip
50 |
# the whole signature descrambling entirely.
51 |
logger.debug('signature found, skip decipher')
52 |
53 |
54 |
if js is not None:
55 |
signature = cipher.get_signature(js, stream['s'])
56 |
57 |
# signature not present in url (line 33), need js to descramble
58 |
# TypeError caught in __main__
59 |
raise TypeError('JS is None')
60 |
61 |
62 |
'finished descrambling signature for itag=%s\n%s',
63 |
stream['itag'], pprint.pformat(
64 |
65 |
's': stream['s'],
66 |
'signature': signature,
67 |
}, indent=2,
68 |
69 |
70 |
# 403 forbidden fix
71 |
stream_manifest[i]['url'] = url + '&sig=' + signature
72 |
73 |
74 |
def apply_descrambler(stream_data, key):
75 |
"""Apply various in-place transforms to YouTube's media stream data.
76 |
77 |
Creates a ``list`` of dictionaries by string splitting on commas, then
78 |
taking each list item, parsing it as a query string, converting it to a
79 |
``dict`` and unquoting the value.
80 |
81 |
:param dict dct:
82 |
Dictionary containing query string encoded values.
83 |
:param str key:
84 |
Name of the key in dictionary.
85 |
86 |
87 |
88 |
>>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
89 |
>>> apply_descrambler(d, 'foo')
90 |
>>> print(d)
91 |
{'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
92 |
93 |
94 |
stream_data[key] = [
95 |
{k: unquote(v) for k, v in parse_qsl(i)}
96 |
for i in stream_data[key].split(',')
97 |
98 |
99 |
'applying descrambler\n%s',
100 |
pprint.pformat(stream_data[key], indent=2),
101 |
@@ -0,0 +1,52 @@
1 |
# -*- coding: utf-8 -*-
2 |
3 |
from typing import Any, Optional
4 |
from typing_extensions import Protocol
5 |
6 |
7 |
class OnProgress(Protocol):
8 |
def __call__(self, stream: Any, chunk: bytes, bytes_remaining: int) -> None:
9 |
"""On download progress callback function.
10 |
11 |
:param stream:
12 |
An instance of :class:`Stream <Stream>` being downloaded.
13 |
:type stream:
14 |
15 |
:param bytes chunk:
16 |
Segment of media file binary data, not yet written to disk.
17 |
:param int bytes_remaining:
18 |
How many bytes have been downloaded.
19 |
20 |
21 |
22 |
23 |
24 |
class OnComplete(Protocol):
25 |
def __call__(self, stream: Any, file_path: Optional[str]) -> None:
26 |
"""On download complete handler function.
27 |
28 |
:param stream:
29 |
An instance of :class:`Stream <Stream>` being downloaded.
30 |
:type stream:
31 |
32 |
:param file_path:
33 |
The file handle where the media is being written to.
34 |
:type file_path: str
35 |
36 |
:rtype: None
37 |
38 |
39 |
40 |
41 |
class Monostate:
42 |
def __init__(
43 |
44 |
on_progress: Optional[OnProgress],
45 |
on_complete: Optional[OnComplete],
46 |
title: Optional[str] = None,
47 |
duration: Optional[int] = None,
48 |
49 |
self.on_progress = on_progress
50 |
self.on_complete = on_complete
51 |
self.title = title
52 |
self.duration = duration
@@ -1,8 +1,14 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""This module provides a query interface for media streams and captions."""
3 |
4 |
5 |
class StreamQuery:
6 |
"""Interface for querying the available media streams."""
7 |
8 |
def __init__(self, fmt_streams):
@@ -15,12 +21,24 @@ class StreamQuery:
15 |
self.itag_index = {int(s.itag): s for s in fmt_streams}
16 |
17 |
def filter(
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
"""Apply the given filtering criterion.
26 |
@@ -89,6 +107,9 @@ class StreamQuery:
89 |
Excludes progressive streams (audio and video are on separate
90 |
91 |
92 |
:param bool only_audio:
93 |
Excludes streams with video tracks.
94 |
@@ -129,16 +150,12 @@ class StreamQuery:
129 |
130 |
if only_audio:
131 |
132 |
lambda s: (
133 |
s.includes_audio_track and not s.includes_video_track
134 |
135 |
136 |
137 |
if only_video:
138 |
139 |
lambda s: (
140 |
s.includes_video_track and not s.includes_audio_track
141 |
142 |
143 |
144 |
if progressive:
@@ -148,43 +165,49 @@ class StreamQuery:
148 |
filters.append(lambda s: s.is_adaptive)
149 |
150 |
if custom_filter_functions:
151 |
152 |
153 |
154 |
fmt_streams = self.fmt_streams
155 |
156 |
fmt_streams =
157 |
return StreamQuery(fmt_streams)
158 |
159 |
def order_by(self, attribute_name):
160 |
"""Apply a sort order
161 |
162 |
:param str attribute_name:
163 |
The name of the attribute to sort by.
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
return StreamQuery(fmt_streams)
186 |
187 |
def desc(self):
188 |
"""Sort streams in descending order.
189 |
190 |
:rtype: :class:`StreamQuery <StreamQuery>`
@@ -192,7 +215,7 @@ class StreamQuery:
192 |
193 |
return StreamQuery(self.fmt_streams[::-1])
194 |
195 |
def asc(self):
196 |
"""Sort streams in ascending order.
197 |
198 |
:rtype: :class:`StreamQuery <StreamQuery>`
@@ -200,10 +223,10 @@ class StreamQuery:
200 |
201 |
return self
202 |
203 |
def get_by_itag(self, itag):
204 |
"""Get the corresponding :class:`Stream <Stream>` for a given itag.
205 |
206 |
207 |
YouTube format identifier code.
208 |
:rtype: :class:`Stream <Stream>` or None
209 |
@@ -211,12 +234,71 @@ class StreamQuery:
211 |
not found.
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
def first(self):
220 |
"""Get the first :class:`Stream <Stream>` in the results.
221 |
222 |
:rtype: :class:`Stream <Stream>` or None
@@ -228,7 +310,7 @@ class StreamQuery:
228 |
229 |
return self.fmt_streams[0]
230 |
except IndexError:
231 |
232 |
233 |
def last(self):
234 |
"""Get the last :class:`Stream <Stream>` in the results.
@@ -244,15 +326,19 @@ class StreamQuery:
244 |
except IndexError:
245 |
246 |
247 |
248 |
249 |
250 |
:rtype: int
251 |
252 |
253 |
254 |
255 |
256 |
"""Get all the results represented by this query as a list.
257 |
258 |
:rtype: list
@@ -260,21 +346,32 @@ class StreamQuery:
260 |
261 |
return self.fmt_streams
262 |
263 |
264 |
265 |
"""Interface for querying the available captions."""
266 |
267 |
def __init__(self, captions):
268 |
"""Construct a :class:`Caption <Caption>`.
269 |
270 |
param list captions:
271 |
list of :class:`Caption <Caption>` instances.
272 |
273 |
274 |
self.captions = captions
275 |
self.lang_code_index = {c.code: c for c in captions}
276 |
277 |
278 |
"""Get the :class:`Caption <Caption>` for a given ``lang_code``.
279 |
280 |
:param str lang_code:
@@ -286,10 +383,23 @@ class CaptionQuery:
286 |
287 |
return self.lang_code_index.get(lang_code)
288 |
289 |
290 |
"""Get all the results represented by this query as a list.
291 |
292 |
:rtype: list
293 |
294 |
295 |
return self.
1 |
# -*- coding: utf-8 -*-
2 |
3 |
"""This module provides a query interface for media streams and captions."""
4 |
from typing import Callable, List, Optional, Union
5 |
from import Mapping, Sequence
6 |
7 |
from pytube import Stream, Caption
8 |
from pytube.helpers import deprecated
9 |
10 |
11 |
class StreamQuery(Sequence):
12 |
"""Interface for querying the available media streams."""
13 |
14 |
def __init__(self, fmt_streams):
21 |
self.itag_index = {int(s.itag): s for s in fmt_streams}
22 |
23 |
def filter(
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
"""Apply the given filtering criterion.
44 |
107 |
Excludes progressive streams (audio and video are on separate
108 |
109 |
110 |
:param bool is_dash:
111 |
Include/exclude dash streams.
112 |
113 |
:param bool only_audio:
114 |
Excludes streams with video tracks.
115 |
150 |
151 |
if only_audio:
152 |
153 |
lambda s: (s.includes_audio_track and not s.includes_video_track),
154 |
155 |
156 |
if only_video:
157 |
158 |
lambda s: (s.includes_video_track and not s.includes_audio_track),
159 |
160 |
161 |
if progressive:
165 |
filters.append(lambda s: s.is_adaptive)
166 |
167 |
if custom_filter_functions:
168 |
169 |
170 |
if is_dash is not None:
171 |
filters.append(lambda s: s.is_dash == is_dash)
172 |
173 |
return self._filter(filters)
174 |
175 |
def _filter(self, filters: List[Callable]) -> "StreamQuery":
176 |
fmt_streams = self.fmt_streams
177 |
for filter_lambda in filters:
178 |
fmt_streams = filter(filter_lambda, fmt_streams)
179 |
return StreamQuery(list(fmt_streams))
180 |
181 |
def order_by(self, attribute_name: str) -> "StreamQuery":
182 |
"""Apply a sort order. Filters out stream the do not have the attribute.
183 |
184 |
:param str attribute_name:
185 |
The name of the attribute to sort by.
186 |
187 |
has_attribute = [
188 |
s for s in self.fmt_streams if getattr(s, attribute_name) is not None
189 |
190 |
# Check that the attributes have string values.
191 |
if has_attribute and isinstance(getattr(has_attribute[0], attribute_name), str):
192 |
# Try to return a StreamQuery sorted by the integer representations
193 |
# of the values.
194 |
195 |
return StreamQuery(
196 |
197 |
198 |
key=lambda s: int(
199 |
"".join(filter(str.isdigit, getattr(s, attribute_name)))
200 |
), # type: ignore # noqa: E501
201 |
202 |
203 |
except ValueError:
204 |
205 |
206 |
return StreamQuery(
207 |
sorted(has_attribute, key=lambda s: getattr(s, attribute_name))
208 |
209 |
210 |
def desc(self) -> "StreamQuery":
211 |
"""Sort streams in descending order.
212 |
213 |
:rtype: :class:`StreamQuery <StreamQuery>`
215 |
216 |
return StreamQuery(self.fmt_streams[::-1])
217 |
218 |
def asc(self) -> "StreamQuery":
219 |
"""Sort streams in ascending order.
220 |
221 |
:rtype: :class:`StreamQuery <StreamQuery>`
223 |
224 |
return self
225 |
226 |
def get_by_itag(self, itag: int) -> Optional[Stream]:
227 |
"""Get the corresponding :class:`Stream <Stream>` for a given itag.
228 |
229 |
:param int itag:
230 |
YouTube format identifier code.
231 |
:rtype: :class:`Stream <Stream>` or None
232 |
234 |
not found.
235 |
236 |
237 |
return self.itag_index.get(int(itag))
238 |
239 |
def get_by_resolution(self, resolution: str) -> Optional[Stream]:
240 |
"""Get the corresponding :class:`Stream <Stream>` for a given resolution.
241 |
242 |
Stream must be a progressive mp4.
243 |
244 |
:param str resolution:
245 |
Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
246 |
:rtype: :class:`Stream <Stream>` or None
247 |
248 |
The :class:`Stream <Stream>` matching the given itag or None if
249 |
not found.
250 |
251 |
252 |
return self.filter(
253 |
progressive=True, subtype="mp4", resolution=resolution
254 |
255 |
256 |
def get_lowest_resolution(self) -> Optional[Stream]:
257 |
"""Get lowest resolution stream that is a progressive mp4.
258 |
259 |
:rtype: :class:`Stream <Stream>` or None
260 |
261 |
The :class:`Stream <Stream>` matching the given itag or None if
262 |
not found.
263 |
264 |
265 |
return (
266 |
self.filter(progressive=True, subtype="mp4").order_by("resolution").first()
267 |
268 |
269 |
def get_highest_resolution(self) -> Optional[Stream]:
270 |
"""Get highest resolution stream that is a progressive video.
271 |
272 |
:rtype: :class:`Stream <Stream>` or None
273 |
274 |
The :class:`Stream <Stream>` matching the given itag or None if
275 |
not found.
276 |
277 |
278 |
return self.filter(progressive=True).order_by("resolution").last()
279 |
280 |
def get_audio_only(self, subtype: str = "mp4") -> Optional[Stream]:
281 |
"""Get highest bitrate audio stream for given codec (defaults to mp4)
282 |
283 |
:param str subtype:
284 |
Audio subtype, defaults to mp4
285 |
:rtype: :class:`Stream <Stream>` or None
286 |
287 |
The :class:`Stream <Stream>` matching the given itag or None if
288 |
not found.
289 |
290 |
return self.filter(only_audio=True, subtype=subtype).order_by("abr").last()
291 |
292 |
def otf(self, is_otf: bool = False) -> "StreamQuery":
293 |
"""Filter stream by OTF, useful if some streams have 404 URLs
294 |
295 |
:param bool is_otf: Set to False to retrieve only non-OTF streams
296 |
:rtype: :class:`StreamQuery <StreamQuery>`
297 |
:returns: A StreamQuery object with otf filtered streams
298 |
299 |
return self._filter([lambda s: s.is_otf == is_otf])
300 |
301 |
def first(self) -> Optional[Stream]:
302 |
"""Get the first :class:`Stream <Stream>` in the results.
303 |
304 |
:rtype: :class:`Stream <Stream>` or None
310 |
311 |
return self.fmt_streams[0]
312 |
except IndexError:
313 |
return None
314 |
315 |
def last(self):
316 |
"""Get the last :class:`Stream <Stream>` in the results.
326 |
except IndexError:
327 |
328 |
329 |
@deprecated("Get the size of this list directly using len()")
330 |
def count(self, value: Optional[str] = None) -> int: # pragma: no cover
331 |
"""Get the count of items in the list.
332 |
333 |
:rtype: int
334 |
335 |
if value:
336 |
return self.fmt_streams.count(value)
337 |
338 |
return len(self)
339 |
340 |
@deprecated("This object can be treated as a list, all() is useless")
341 |
def all(self) -> List[Stream]: # pragma: no cover
342 |
"""Get all the results represented by this query as a list.
343 |
344 |
:rtype: list
346 |
347 |
return self.fmt_streams
348 |
349 |
def __getitem__(self, i: Union[slice, int]):
350 |
return self.fmt_streams[i]
351 |
352 |
def __len__(self) -> int:
353 |
return len(self.fmt_streams)
354 |
355 |
def __repr__(self) -> str:
356 |
return f"{self.fmt_streams}"
357 |
358 |
359 |
class CaptionQuery(Mapping):
360 |
"""Interface for querying the available captions."""
361 |
362 |
def __init__(self, captions: List[Caption]):
363 |
"""Construct a :class:`Caption <Caption>`.
364 |
365 |
param list captions:
366 |
list of :class:`Caption <Caption>` instances.
367 |
368 |
369 |
self.lang_code_index = {c.code: c for c in captions}
370 |
371 |
@deprecated("This object can be treated as a dictionary, i.e. captions['en']")
372 |
def get_by_language_code(
373 |
self, lang_code: str
374 |
) -> Optional[Caption]: # pragma: no cover
375 |
"""Get the :class:`Caption <Caption>` for a given ``lang_code``.
376 |
377 |
:param str lang_code:
383 |
384 |
return self.lang_code_index.get(lang_code)
385 |
386 |
@deprecated("This object can be treated as a dictionary")
387 |
def all(self) -> List[Caption]: # pragma: no cover
388 |
"""Get all the results represented by this query as a list.
389 |
390 |
:rtype: list
391 |
392 |
393 |
return list(self.lang_code_index.values())
394 |
395 |
def __getitem__(self, i: str):
396 |
return self.lang_code_index[i]
397 |
398 |
def __len__(self) -> int:
399 |
return len(self.lang_code_index)
400 |
401 |
def __iter__(self):
402 |
return iter(self.lang_code_index.values())
403 |
404 |
def __repr__(self) -> str:
405 |
return f"{self.lang_code_index}"
@@ -1,47 +1,89 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""Implements a simple wrapper around urlopen."""
3 |
4 |
5 |
from pytube.compat import urlopen
6 |
# 403 forbidden fix
7 |
8 |
9 |
10 |
11 |
streaming=False, chunk_size=8 * 1024,
12 |
13 |
"""Send an http GET request.
14 |
15 |
:param str url:
16 |
The URL to perform the GET request for.
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
"""Read the response in chunks."""
43 |
while True:
44 |
buf =
45 |
if not buf:
46 |
47 |
yield buf
1 |
# -*- coding: utf-8 -*-
2 |
3 |
"""Implements a simple wrapper around urlopen."""
4 |
import logging
5 |
from functools import lru_cache
6 |
from http.client import HTTPResponse
7 |
from typing import Iterable, Dict, Optional
8 |
from urllib.request import Request
9 |
from urllib.request import urlopen
10 |
11 |
logger = logging.getLogger(__name__)
12 |
13 |
14 |
def _execute_request(
15 |
url: str, method: Optional[str] = None, headers: Optional[Dict[str, str]] = None
16 |
) -> HTTPResponse:
17 |
base_headers = {"User-Agent": "Mozilla/5.0"}
18 |
if headers:
19 |
20 |
if url.lower().startswith("http"):
21 |
request = Request(url, headers=base_headers, method=method)
22 |
23 |
raise ValueError("Invalid URL")
24 |
return urlopen(request) # nosec
25 |
26 |
27 |
def get(url) -> str:
28 |
"""Send an http GET request.
29 |
30 |
:param str url:
31 |
The URL to perform the GET request for.
32 |
:rtype: str
33 |
34 |
UTF-8 encoded string of response
35 |
36 |
return _execute_request(url).read().decode("utf-8")
37 |
38 |
39 |
def stream(
40 |
url: str, chunk_size: int = 4096, range_size: int = 9437184
41 |
) -> Iterable[bytes]:
42 |
"""Read the response in chunks.
43 |
:param str url: The URL to perform the GET request for.
44 |
:param int chunk_size: The size in bytes of each chunk. Defaults to 4KB
45 |
:param int range_size: The size in bytes of each range request. Defaults to 9MB
46 |
:rtype: Iterable[bytes]
47 |
48 |
file_size: int = range_size # fake filesize to start
49 |
downloaded = 0
50 |
while downloaded < file_size:
51 |
stop_pos = min(downloaded + range_size, file_size) - 1
52 |
range_header = f"bytes={downloaded}-{stop_pos}"
53 |
response = _execute_request(url, method="GET", headers={"Range": range_header})
54 |
if file_size == range_size:
55 |
56 |
content_range =["Content-Range"]
57 |
file_size = int(content_range.split("/")[1])
58 |
except (KeyError, IndexError, ValueError) as e:
59 |
60 |
while True:
61 |
chunk =
62 |
if not chunk:
63 |
64 |
downloaded += len(chunk)
65 |
yield chunk
66 |
return # pylint: disable=R1711
67 |
68 |
69 |
70 |
def filesize(url: str) -> int:
71 |
"""Fetch size in bytes of file at given URL
72 |
73 |
:param str url: The URL to get the size of
74 |
:returns: int: size in bytes of remote file
75 |
76 |
return int(head(url)["content-length"])
77 |
78 |
79 |
def head(url: str) -> Dict:
80 |
"""Fetch headers returned http GET request.
81 |
82 |
:param str url:
83 |
The URL to perform the GET request for.
84 |
:rtype: dict
85 |
86 |
dictionary of lowercase headers
87 |
88 |
response_headers = _execute_request(url, method="HEAD").info()
89 |
return {k.lower(): v for k, v in response_headers.items()}
@@ -1,4 +1,5 @@
1 |
# -*- coding: utf-8 -*-
2 |
3 |
This module contains a container for stream manifest data.
4 |
@@ -7,26 +8,26 @@ combined). This was referred to as ``Video`` in the legacy pytube version, but
7 |
has been renamed to accommodate DASH (which serves the audio and video
8 |
9 |
10 |
from __future__ import absolute_import
11 |
12 |
13 |
import logging
14 |
import os
15 |
16 |
17 |
from pytube import extract
18 |
from pytube import request
19 |
from pytube.helpers import safe_filename
20 |
from pytube.itags import get_format_profile
21 |
22 |
23 |
logger = logging.getLogger(__name__)
24 |
25 |
26 |
class Stream
27 |
"""Container for stream manifest data."""
28 |
29 |
def __init__(self, stream, player_config_args, monostate):
30 |
"""Construct a :class:`Stream <Stream>`.
31 |
32 |
:param dict stream:
@@ -42,67 +43,52 @@ class Stream(object):
42 |
# (Borg pattern).
43 |
self._monostate = monostate
44 |
45 |
46 |
47 |
self.itag = None # stream format id (youtube nomenclature)
48 |
self.res = None # resolution (e.g.: 480p, 720p, 1080p)
49 |
self.url = None # signed download url
50 |
51 |
self._filesize = None # filesize in bytes
52 |
self.mime_type = None # content identifier (e.g.: video/mp4)
53 |
self.type = None # the part of the mime before the slash
54 |
self.subtype = None # the part of the mime after the slash
55 |
56 |
self.codecs = [] # audio/video encoders (e.g.: vp8, mp4a)
57 |
self.audio_codec = None # audio codec of the stream (e.g.: vorbis)
58 |
self.video_codec = None # video codec of the stream (e.g.: vp8)
59 |
60 |
# Iterates over the key/values of stream and sets them as class
61 |
# attributes. This is an anti-pattern and should be removed.
62 |
63 |
64 |
65 |
# frame rate, and whether the stream is live (HLS) or 3D.
66 |
self.fmt_profile = get_format_profile(self.itag)
67 |
68 |
# Same as above, except for the format profile attributes.
69 |
70 |
71 |
# The player configuration which contains information like the video
72 |
# title.
73 |
# TODO(nficano): this should be moved to the monostate.
74 |
self.player_config_args = player_config_args
75 |
76 |
# 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
77 |
self.mime_type, self.codecs = extract.mime_type_codec(
78 |
79 |
# 'video/webm' -> 'video', 'webm'
80 |
self.type, self.subtype = self.mime_type.split(
81 |
82 |
# ['vp8', 'vorbis'] -> video_codec: vp8, audio_codec: vorbis. DASH
83 |
# streams return NoneType for audio/video depending.
84 |
self.video_codec, self.audio_codec = self.parse_codecs()
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
def is_adaptive(self):
96 |
"""Whether the stream is DASH.
97 |
98 |
:rtype: bool
99 |
100 |
# if codecs has two elements (e.g.: ['vp8', 'vorbis']): 2 % 2 = 0
101 |
# if codecs has one element (e.g.: ['vp8']) 1 % 2 = 1
102 |
return len(self.codecs) % 2
103 |
104 |
105 |
def is_progressive(self):
106 |
"""Whether the stream is progressive.
107 |
108 |
:rtype: bool
@@ -110,26 +96,22 @@ class Stream(object):
110 |
return not self.is_adaptive
111 |
112 |
113 |
def includes_audio_track(self):
114 |
"""Whether the stream only contains audio.
115 |
116 |
:rtype: bool
117 |
118 |
119 |
return True
120 |
return self.type == 'audio'
121 |
122 |
123 |
def includes_video_track(self):
124 |
"""Whether the stream only contains video.
125 |
126 |
:rtype: bool
127 |
128 |
129 |
return True
130 |
return self.type == 'video'
131 |
132 |
def parse_codecs(self):
133 |
"""Get the video/audio codecs from list of codecs.
134 |
135 |
Parse a variable length sized list of codecs and returns a
@@ -153,7 +135,7 @@ class Stream(object):
153 |
return video, audio
154 |
155 |
156 |
def filesize(self):
157 |
"""File size of the media stream in bytes.
158 |
159 |
:rtype: int
@@ -161,45 +143,57 @@ class Stream(object):
161 |
Filesize (in bytes) of the stream.
162 |
163 |
if self._filesize is None:
164 |
165 |
self._filesize = int(headers['content-length'])
166 |
return self._filesize
167 |
168 |
169 |
def title(self):
170 |
"""Get title of video
171 |
172 |
:rtype: str
173 |
174 |
Youtube video title
175 |
176 |
177 |
178 |
179 |
return player_config_args['title']
180 |
181 |
182 |
183 |
184 |
185 |
186 |
return details['title']
187 |
188 |
189 |
190 |
191 |
def default_filename(self):
192 |
"""Generate filename based on the video title.
193 |
194 |
:rtype: str
195 |
196 |
An os file system compatible filename.
197 |
198 |
199 |
filename = safe_filename(self.title)
200 |
201 |
202 |
def download(
203 |
"""Write the media stream to disk.
204 |
205 |
:param output_path:
@@ -214,71 +208,82 @@ class Stream(object):
214 |
(optional) A string that will be prepended to the filename.
215 |
For example a number in a playlist or the name of a series.
216 |
If one is not specified, nothing will be prepended
217 |
This is
218 |
filename but still add a prefix.
219 |
:type filename_prefix: str or None
220 |
221 |
:rtype: str
222 |
223 |
224 |
225 |
226 |
227 |
filename = '{filename}.{s.subtype}'.format(filename=safe, s=self)
228 |
filename = filename or self.default_filename
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
# file path
238 |
fp = os.path.join(output_path, filename)
239 |
bytes_remaining = self.filesize
240 |
241 |
242 |
self.filesize, fp,
243 |
244 |
245 |
with open(
246 |
for chunk in request.
247 |
# reduce the (bytes) remainder by the length of the chunk.
248 |
bytes_remaining -= len(chunk)
249 |
# send to the on_progress callback.
250 |
self.on_progress(chunk, fh, bytes_remaining)
251 |
252 |
253 |
254 |
255 |
"""Write the media stream to buffer
256 |
257 |
:rtype: io.BytesIO buffer
258 |
259 |
buffer = io.BytesIO()
260 |
bytes_remaining = self.filesize
261 |
262 |
263 |
264 |
265 |
266 |
for chunk in request.
267 |
# reduce the (bytes) remainder by the length of the chunk.
268 |
bytes_remaining -= len(chunk)
269 |
# send to the on_progress callback.
270 |
self.on_progress(chunk, buffer, bytes_remaining)
271 |
272 |
return buffer
273 |
274 |
def on_progress(self, chunk, file_handler, bytes_remaining):
275 |
"""On progress callback function.
276 |
277 |
This function writes the binary data to the file, then checks if an
278 |
additional callback is defined in the monostate. This is exposed to
279 |
allow things like displaying a progress bar.
280 |
281 |
282 |
Segment of media file binary data, not yet written to disk.
283 |
:param file_handler:
284 |
The file handle where the media is being written to.
@@ -292,56 +297,43 @@ class Stream(object):
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
'chunk_size': len(chunk),
300 |
'bytes_remaining': bytes_remaining,
301 |
}, indent=2,
302 |
303 |
304 |
on_progress = self._monostate['on_progress']
305 |
if on_progress:
306 |
logger.debug('calling on_progress callback %s', on_progress)
307 |
on_progress(self, chunk, file_handler, bytes_remaining)
308 |
309 |
def on_complete(self,
310 |
"""On download complete handler function.
311 |
312 |
313 |
The file handle where the media is being written to.
314 |
315 |
316 |
317 |
:rtype: None
318 |
319 |
320 |
321 |
on_complete = self._monostate
322 |
if on_complete:
323 |
324 |
325 |
326 |
def __repr__(self):
327 |
"""Printable object representation.
328 |
329 |
:rtype: str
330 |
331 |
A string representation of a :class:`Stream <Stream>` object.
332 |
333 |
# TODO(nficano): this can probably be written better.
334 |
parts = ['itag="{s.itag}"', 'mime_type="{s.mime_type}"']
335 |
if self.includes_video_track:
336 |
parts.extend(['res="{s.resolution}"', 'fps="{s.fps}fps"'])
337 |
if not self.is_adaptive:
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"'])
346 |
347 |
1 |
# -*- coding: utf-8 -*-
2 |
3 |
4 |
This module contains a container for stream manifest data.
5 |
8 |
has been renamed to accommodate DASH (which serves the audio and video
9 |
10 |
11 |
12 |
from datetime import datetime
13 |
import logging
14 |
import os
15 |
from typing import Dict, Tuple, Optional, BinaryIO
16 |
from urllib.parse import parse_qs
17 |
18 |
from pytube import extract
19 |
from pytube import request
20 |
from pytube.helpers import safe_filename, target_directory
21 |
from pytube.itags import get_format_profile
22 |
from pytube.monostate import Monostate
23 |
24 |
logger = logging.getLogger(__name__)
25 |
26 |
27 |
class Stream:
28 |
"""Container for stream manifest data."""
29 |
30 |
def __init__(self, stream: Dict, player_config_args: Dict, monostate: Monostate):
31 |
"""Construct a :class:`Stream <Stream>`.
32 |
33 |
:param dict stream:
43 |
# (Borg pattern).
44 |
self._monostate = monostate
45 |
46 |
self.url = stream["url"] # signed download url
47 |
self.itag = int(stream["itag"]) # stream format id (youtube nomenclature)
48 |
49 |
# set type and codec info
50 |
51 |
# 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
52 |
self.mime_type, self.codecs = extract.mime_type_codec(stream["type"])
53 |
54 |
# 'video/webm' -> 'video', 'webm'
55 |
self.type, self.subtype = self.mime_type.split("/")
56 |
57 |
# ['vp8', 'vorbis'] -> video_codec: vp8, audio_codec: vorbis. DASH
58 |
# streams return NoneType for audio/video depending.
59 |
self.video_codec, self.audio_codec = self.parse_codecs()
60 |
61 |
self.is_otf: bool = stream["is_otf"]
62 |
self.bitrate: Optional[int] = stream["bitrate"]
63 |
64 |
self._filesize: Optional[int] = None # filesize in bytes
65 |
66 |
# Additional information about the stream format, such as resolution,
67 |
# frame rate, and whether the stream is live (HLS) or 3D.
68 |
itag_profile = get_format_profile(self.itag)
69 |
self.is_dash = itag_profile["is_dash"]
70 |
self.abr = itag_profile["abr"] # average bitrate (audio streams only)
71 |
self.fps = itag_profile["fps"] # frames per second (video streams only)
72 |
self.resolution = itag_profile["resolution"] # resolution (e.g.: "480p")
73 |
self.is_3d = itag_profile["is_3d"]
74 |
self.is_hdr = itag_profile["is_hdr"]
75 |
self.is_live = itag_profile["is_live"]
76 |
77 |
# The player configuration, contains info like the video title.
78 |
self.player_config_args = player_config_args
79 |
80 |
81 |
def is_adaptive(self) -> bool:
82 |
"""Whether the stream is DASH.
83 |
84 |
:rtype: bool
85 |
86 |
# if codecs has two elements (e.g.: ['vp8', 'vorbis']): 2 % 2 = 0
87 |
# if codecs has one element (e.g.: ['vp8']) 1 % 2 = 1
88 |
return bool(len(self.codecs) % 2)
89 |
90 |
91 |
def is_progressive(self) -> bool:
92 |
"""Whether the stream is progressive.
93 |
94 |
:rtype: bool
96 |
return not self.is_adaptive
97 |
98 |
99 |
def includes_audio_track(self) -> bool:
100 |
"""Whether the stream only contains audio.
101 |
102 |
:rtype: bool
103 |
104 |
return self.is_progressive or self.type == "audio"
105 |
106 |
107 |
def includes_video_track(self) -> bool:
108 |
"""Whether the stream only contains video.
109 |
110 |
:rtype: bool
111 |
112 |
return self.is_progressive or self.type == "video"
113 |
114 |
def parse_codecs(self) -> Tuple[Optional[str], Optional[str]]:
115 |
"""Get the video/audio codecs from list of codecs.
116 |
117 |
Parse a variable length sized list of codecs and returns a
135 |
return video, audio
136 |
137 |
138 |
def filesize(self) -> int:
139 |
"""File size of the media stream in bytes.
140 |
141 |
:rtype: int
143 |
Filesize (in bytes) of the stream.
144 |
145 |
if self._filesize is None:
146 |
self._filesize = request.filesize(self.url)
147 |
return self._filesize
148 |
149 |
150 |
def title(self) -> str:
151 |
"""Get title of video
152 |
153 |
:rtype: str
154 |
155 |
Youtube video title
156 |
157 |
return self._monostate.title or "Unknown YouTube Video Title"
158 |
159 |
160 |
def filesize_approx(self) -> int:
161 |
"""Get approximate filesize of the video
162 |
163 |
Falls back to HTTP call if there is not sufficient information to approximate
164 |
165 |
:rtype: int
166 |
:returns: size of video in bytes
167 |
168 |
if self._monostate.duration and self.bitrate:
169 |
bits_in_byte = 8
170 |
return int((self._monostate.duration * self.bitrate) / bits_in_byte)
171 |
172 |
return self.filesize
173 |
174 |
175 |
def expiration(self) -> datetime:
176 |
expire = parse_qs(self.url.split("?")[1])["expire"][0]
177 |
return datetime.utcfromtimestamp(int(expire))
178 |
179 |
180 |
def default_filename(self) -> str:
181 |
"""Generate filename based on the video title.
182 |
183 |
:rtype: str
184 |
185 |
An os file system compatible filename.
186 |
187 |
filename = safe_filename(self.title)
188 |
return f"{filename}.{self.subtype}"
189 |
190 |
def download(
191 |
192 |
output_path: Optional[str] = None,
193 |
filename: Optional[str] = None,
194 |
filename_prefix: Optional[str] = None,
195 |
skip_existing: bool = True,
196 |
) -> str:
197 |
"""Write the media stream to disk.
198 |
199 |
:param output_path:
208 |
(optional) A string that will be prepended to the filename.
209 |
For example a number in a playlist or the name of a series.
210 |
If one is not specified, nothing will be prepended
211 |
This is separate from filename so you can use the default
212 |
filename but still add a prefix.
213 |
:type filename_prefix: str or None
214 |
:param skip_existing:
215 |
(optional) skip existing files, defaults to True
216 |
:type skip_existing: bool
217 |
218 |
Path to the saved video
219 |
:rtype: str
220 |
221 |
222 |
file_path = self.get_file_path(
223 |
filename=filename, output_path=output_path, filename_prefix=filename_prefix
224 |
225 |
226 |
if skip_existing and self.exists_at_path(file_path):
227 |
logger.debug("file %s already exists, skipping", file_path)
228 |
229 |
return file_path
230 |
231 |
bytes_remaining = self.filesize
232 |
233 |
"downloading (%s total bytes) file to %s", self.filesize, file_path,
234 |
235 |
236 |
with open(file_path, "wb") as fh:
237 |
for chunk in
238 |
# reduce the (bytes) remainder by the length of the chunk.
239 |
bytes_remaining -= len(chunk)
240 |
# send to the on_progress callback.
241 |
self.on_progress(chunk, fh, bytes_remaining)
242 |
243 |
return file_path
244 |
245 |
def get_file_path(
246 |
247 |
filename: Optional[str],
248 |
output_path: Optional[str],
249 |
filename_prefix: Optional[str] = None,
250 |
) -> str:
251 |
if filename:
252 |
filename = f"{safe_filename(filename)}.{self.subtype}"
253 |
254 |
filename = self.default_filename
255 |
if filename_prefix:
256 |
filename = f"{safe_filename(filename_prefix)}{filename}"
257 |
return os.path.join(target_directory(output_path), filename)
258 |
259 |
def exists_at_path(self, file_path: str) -> bool:
260 |
return os.path.isfile(file_path) and os.path.getsize(file_path) == self.filesize
261 |
262 |
def stream_to_buffer(self, buffer: BinaryIO) -> None:
263 |
"""Write the media stream to buffer
264 |
265 |
:rtype: io.BytesIO buffer
266 |
267 |
bytes_remaining = self.filesize
268 |
269 |
"downloading (%s total bytes) file to buffer", self.filesize,
270 |
271 |
272 |
for chunk in
273 |
# reduce the (bytes) remainder by the length of the chunk.
274 |
bytes_remaining -= len(chunk)
275 |
# send to the on_progress callback.
276 |
self.on_progress(chunk, buffer, bytes_remaining)
277 |
278 |
279 |
def on_progress(self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int):
280 |
"""On progress callback function.
281 |
282 |
This function writes the binary data to the file, then checks if an
283 |
additional callback is defined in the monostate. This is exposed to
284 |
allow things like displaying a progress bar.
285 |
286 |
:param bytes chunk:
287 |
Segment of media file binary data, not yet written to disk.
288 |
:param file_handler:
289 |
The file handle where the media is being written to.
297 |
298 |
299 |
300 |
logger.debug("download remaining: %s", bytes_remaining)
301 |
if self._monostate.on_progress:
302 |
self._monostate.on_progress(self, chunk, bytes_remaining)
303 |
304 |
def on_complete(self, file_path: Optional[str]):
305 |
"""On download complete handler function.
306 |
307 |
:param file_path:
308 |
The file handle where the media is being written to.
309 |
:type file_path: str
310 |
311 |
:rtype: None
312 |
313 |
314 |
logger.debug("download finished")
315 |
on_complete = self._monostate.on_complete
316 |
if on_complete:
317 |
logger.debug("calling on_complete callback %s", on_complete)
318 |
on_complete(self, file_path)
319 |
320 |
def __repr__(self) -> str:
321 |
"""Printable object representation.
322 |
323 |
:rtype: str
324 |
325 |
A string representation of a :class:`Stream <Stream>` object.
326 |
327 |
parts = ['itag="{s.itag}"', 'mime_type="{s.mime_type}"']
328 |
if self.includes_video_track:
329 |
parts.extend(['res="{s.resolution}"', 'fps="{s.fps}fps"'])
330 |
if not self.is_adaptive:
331 |
332 |
['vcodec="{s.video_codec}"', 'acodec="{s.audio_codec}"',]
333 |
334 |
335 |
336 |
337 |
parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"'])
338 |
parts.extend(['progressive="{s.is_progressive}"', 'type="{s.type}"'])
339 |
return f"<Stream: {' '.join(parts).format(s=self)}>"
@@ -0,0 +1,6 @@
1 |
# -*- coding: utf-8 -*-
2 |
3 |
__version__ = "9.6.4"
4 |
5 |
if __name__ == "__main__":
6 |
@@ -1,23 +0,0 @@
1 |
2 |
commit = True
3 |
tag = True
4 |
current_version = 9.5.3
5 |
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+))?
6 |
serialize =
7 |
8 |
9 |
10 |
description-file =
11 |
12 |
13 |
14 |
15 |
16 |
17 |
source = pytube
18 |
omit =
19 |
20 |
21 |
22 |
ignore = W605
23 |
@@ -1,92 +1,58 @@
1 |
#!/usr/bin/env python
2 |
# -*- coding: utf-8 -*-
3 |
"""This module contains setup instructions for
4 |
import codecs
5 |
import os
6 |
import sys
7 |
from shutil import rmtree
8 |
9 |
from setuptools import Command
10 |
from setuptools import setup
11 |
12 |
here = os.path.abspath(os.path.dirname(__file__))
13 |
14 |
15 |
long_description =
16 |
17 |
18 |
class UploadCommand(Command):
19 |
"""Support publish."""
20 |
21 |
description = 'Build and publish the package.'
22 |
user_options = []
23 |
24 |
25 |
def status(s):
26 |
"""Prints things in bold."""
27 |
28 |
29 |
def initialize_options(self):
30 |
31 |
32 |
def finalize_options(self):
33 |
34 |
35 |
def run(self):
36 |
37 |
self.status('Removing previous builds ...')
38 |
rmtree(os.path.join(here, 'dist'))
39 |
except Exception:
40 |
41 |
self.status('Building Source distribution ...')
42 |
os.system('{0} sdist bdist_wheel'.format(sys.executable))
43 |
self.status('Uploading the package to PyPI via Twine ...')
44 |
os.system('twine upload dist/*')
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
'console_scripts': [
61 |
'pytube = pytube.cli:main',
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
'Topic :: Terminals',
84 |
'Topic :: Utilities',
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
1 |
#!/usr/bin/env python
2 |
# -*- coding: utf-8 -*-
3 |
"""This module contains setup instructions for pytube3."""
4 |
import codecs
5 |
import os
6 |
from setuptools import setup
7 |
8 |
here = os.path.abspath(os.path.dirname(__file__))
9 |
10 |
with, ""), encoding="utf-8") as fh:
11 |
long_description = "\n" +
12 |
13 |
with open(os.path.join(here, "pytube", "")) as fp:
14 |
15 |
16 |
17 |
18 |
version=__version__, # noqa: F821
19 |
author="Nick Ficano, Harold Martin",
20 |
21 |
packages=["pytube", "pytube.contrib"],
22 |
package_data={"": ["LICENSE"],},
23 |
24 |
25 |
entry_points={"console_scripts": ["pytube3 = pytube.cli:main",],},
26 |
27 |
28 |
"Development Status :: 5 - Production/Stable",
29 |
"Environment :: Console",
30 |
"Intended Audience :: Developers",
31 |
"License :: OSI Approved :: MIT License",
32 |
"Natural Language :: English",
33 |
"Operating System :: MacOS",
34 |
"Operating System :: Microsoft",
35 |
"Operating System :: POSIX",
36 |
"Operating System :: Unix",
37 |
"Programming Language :: Python :: 3.6",
38 |
"Programming Language :: Python :: 3.7",
39 |
"Programming Language :: Python :: 3.8",
40 |
"Programming Language :: Python",
41 |
"Topic :: Internet",
42 |
"Topic :: Multimedia :: Video",
43 |
"Topic :: Software Development :: Libraries :: Python Modules",
44 |
"Topic :: Terminals",
45 |
"Topic :: Utilities",
46 |
47 |
description=("Python 3 library for downloading YouTube Videos."),
48 |
49 |
50 |
51 |
52 |
53 |
54 |
"Bug Reports": "",
55 |
"Read the Docs": "",
56 |
57 |
keywords=["youtube", "download", "video", "stream",],
58 |
@@ -1,6 +1,5 @@
1 |
# -*- coding: utf-8 -*-
2 |
"""Reusable dependency injected testing components."""
3 |
from __future__ import unicode_literals
4 |
5 |
import gzip
6 |
import json
@@ -15,39 +14,61 @@ def load_playback_file(filename):
15 |
"""Load a gzip json playback file."""
16 |
cur_fp = os.path.realpath(__file__)
17 |
cur_dir = os.path.dirname(cur_fp)
18 |
fp = os.path.join(cur_dir,
19 |
20 |
content =
21 |
return json.loads(content)
22 |
23 |
24 |
def load_and_init_from_playback_file(filename):
25 |
"""Load a gzip json playback file and create YouTube instance."""
26 |
pb = load_playback_file(filename)
27 |
yt = YouTube(pb[
28 |
yt.watch_html = pb[
29 |
yt.js = pb[
30 |
yt.vid_info = pb[
31 |
32 |
return yt
33 |
34 |
35 |
36 |
def cipher_signature():
37 |
"""Youtube instance initialized with video id 9bZkp7q19f0."""
38 |
filename =
39 |
return load_and_init_from_playback_file(filename)
40 |
41 |
42 |
43 |
def presigned_video():
44 |
"""Youtube instance initialized with video id QRS8MkLhQmM."""
45 |
filename =
46 |
return load_and_init_from_playback_file(filename)
47 |
48 |
49 |
50 |
def age_restricted():
51 |
"""Youtube instance initialized with video id zRbsm3e2ltw."""
52 |
filename =
53 |
return load_playback_file(filename)
1 |
# -*- coding: utf-8 -*-
2 |
"""Reusable dependency injected testing components."""
3 |
4 |
import gzip
5 |
import json
14 |
"""Load a gzip json playback file."""
15 |
cur_fp = os.path.realpath(__file__)
16 |
cur_dir = os.path.dirname(cur_fp)
17 |
fp = os.path.join(cur_dir, "mocks", filename)
18 |
with, "rb") as fh:
19 |
content ="utf-8")
20 |
return json.loads(content)
21 |
22 |
23 |
def load_and_init_from_playback_file(filename):
24 |
"""Load a gzip json playback file and create YouTube instance."""
25 |
pb = load_playback_file(filename)
26 |
yt = YouTube(pb["url"], defer_prefetch_init=True)
27 |
yt.watch_html = pb["watch_html"]
28 |
yt.js = pb["js"]
29 |
yt.vid_info = pb["video_info"]
30 |
31 |
return yt
32 |
33 |
34 |
35 |
def cipher_signature():
36 |
"""Youtube instance initialized with video id 9bZkp7q19f0."""
37 |
filename = "yt-video-9bZkp7q19f0.json.gz"
38 |
return load_and_init_from_playback_file(filename)
39 |
40 |
41 |
42 |
def presigned_video():
43 |
"""Youtube instance initialized with video id QRS8MkLhQmM."""
44 |
filename = "yt-video-QRS8MkLhQmM.json.gz"
45 |
return load_and_init_from_playback_file(filename)
46 |
47 |
48 |
49 |
def age_restricted():
50 |
"""Youtube instance initialized with video id zRbsm3e2ltw."""
51 |
filename = "yt-video-irauhITDrsE.json.gz"
52 |
return load_playback_file(filename)
53 |
54 |
55 |
56 |
def playlist_html():
57 |
"""Youtube playlist HTML loaded on 2020-01-25 from
58 |
59 |
file_path = os.path.join(
60 |
os.path.dirname(os.path.realpath(__file__)), "mocks", "playlist.html.gz"
61 |
62 |
with, "rb") as f:
63 |
64 |
65 |
66 |
67 |
def playlist_long_html():
68 |
"""Youtube playlist HTML loaded on 2020-01-25 from
69 |
70 |
file_path = os.path.join(
71 |
os.path.dirname(os.path.realpath(__file__)), "mocks", "playlist_long.html.gz"
72 |
73 |
with, "rb") as f:
74 |
Binary file (1.36 kB)
@@ -1,10 +1,220 @@
1 |
# -*- coding: utf-8 -*-
2 |
from pytube import Playlist
3 |
4 |
5 |
6 |
7 |
8 |
pl = Playlist(url)
9 |
pl_title = pl.title()
10 |
assert pl_title ==
1 |
# -*- coding: utf-8 -*-
2 |
import datetime
3 |
from unittest import mock
4 |
from unittest.mock import MagicMock
5 |
6 |
from pytube import Playlist
7 |
8 |
9 |
10 |
def test_title(request_get):
11 |
request_get.return_value = (
12 |
"<title>(149) Python Tutorial for Beginners "
13 |
"(For Absolute Beginners) - YouTube</title>"
14 |
15 |
url = ""
16 |
pl = Playlist(url)
17 |
pl_title = pl.title()
18 |
assert pl_title == "(149) Python Tutorial for Beginners (For Absolute Beginners)"
19 |
20 |
21 |
22 |
def test_init_with_playlist_url(request_get):
23 |
request_get.return_value = ""
24 |
url = ""
25 |
playlist = Playlist(url)
26 |
assert playlist.playlist_url == url
27 |
28 |
29 |
30 |
def test_init_with_watch_url(request_get):
31 |
request_get.return_value = ""
32 |
url = (
33 |
34 |
35 |
36 |
playlist = Playlist(url)
37 |
assert (
38 |
39 |
== ""
40 |
41 |
42 |
43 |
44 |
def test_last_update(request_get, playlist_html):
45 |
expected =, 3, 7)
46 |
request_get.return_value = playlist_html
47 |
playlist = Playlist("url")
48 |
assert playlist.last_update == expected
49 |
50 |
51 |
52 |
def test_init_with_watch_id(request_get):
53 |
request_get.return_value = ""
54 |
playlist = Playlist("PLS1QulWo1RIaJECMeUT4LFwJ-ghgoSH6n")
55 |
assert (
56 |
57 |
== ""
58 |
59 |
60 |
61 |
62 |
def test_video_urls(request_get, playlist_html):
63 |
url = ""
64 |
request_get.return_value = playlist_html
65 |
playlist = Playlist(url)
66 |
playlist._find_load_more_url = MagicMock(return_value=None)
67 |
68 |
assert playlist.video_urls == [
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
def test_repr(request_get, playlist_html):
86 |
url = ""
87 |
request_get.return_value = playlist_html
88 |
playlist = Playlist(url)
89 |
playlist._find_load_more_url = MagicMock(return_value=None)
90 |
91 |
assert (
92 |
repr(playlist) == "['', "
93 |
"'', "
94 |
"'', "
95 |
"'', "
96 |
"'', "
97 |
"'', "
98 |
"'', "
99 |
"'', "
100 |
"'', "
101 |
"'', "
102 |
"'', "
103 |
104 |
105 |
106 |
107 |
108 |
def test_sequence(request_get, playlist_html):
109 |
url = ""
110 |
request_get.return_value = playlist_html
111 |
playlist = Playlist(url)
112 |
playlist._find_load_more_url = MagicMock(return_value=None)
113 |
assert playlist[0] == ""
114 |
assert len(playlist) == 12
115 |
116 |
117 |
118 |
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
119 |
def test_videos(youtube, request_get, playlist_html):
120 |
url = ""
121 |
request_get.return_value = playlist_html
122 |
playlist = Playlist(url)
123 |
playlist._find_load_more_url = MagicMock(return_value=None)
124 |
125 |
assert len(list(playlist.videos)) == 12
126 |
127 |
128 |
129 |
@mock.patch("pytube.cli.YouTube.__init__", return_value=None)
130 |
def test_load_more(youtube, request_get, playlist_html):
131 |
url = ""
132 |
request_get.side_effect = [
133 |
134 |
'{"content_html":"", "load_more_widget_html":""}',
135 |
136 |
playlist = Playlist(url)
137 |
playlist._find_load_more_url = MagicMock(side_effect=["dummy", None])
138 |
139 |
assert len(list(playlist.videos)) == 12
140 |
141 |
142 |
143 |
@mock.patch("pytube.contrib.playlist.install_proxy", return_value=None)
144 |
def test_proxy(install_proxy, request_get):
145 |
url = ""
146 |
request_get.return_value = ""
147 |
Playlist(url, proxies={"http": "things"})
148 |
install_proxy.assert_called_with({"http": "things"})
149 |
150 |
151 |
152 |
def test_trimmed(request_get, playlist_html):
153 |
url = ""
154 |
request_get.return_value = playlist_html
155 |
playlist = Playlist(url)
156 |
playlist._find_load_more_url = MagicMock(return_value=None)
157 |
assert request_get.call_count == 1
158 |
trimmed = list(playlist.trimmed("1BYu65vLKdA"))
159 |
assert trimmed == [
160 |
161 |
162 |
163 |
164 |
165 |
166 |
def test_playlist_failed_pagination(request_get, playlist_long_html):
167 |
url = ""
168 |
request_get.side_effect = [
169 |
170 |
171 |
172 |
playlist = Playlist(url)
173 |
video_urls = playlist.video_urls
174 |
assert len(video_urls) == 100
175 |
assert request_get.call_count == 2
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
def test_playlist_pagination(request_get, playlist_html, playlist_long_html):
184 |
url = ""
185 |
request_get.side_effect = [
186 |
187 |
'{"content_html":"<a href=\\"/watch?v=BcWz41-4cDk&feature=plpp_video&ved'
188 |
'=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", "load_more_widget_html":""}',
189 |
190 |
191 |
playlist = Playlist(url)
192 |
assert len(playlist.video_urls) == 101
193 |
assert request_get.call_count == 2
194 |
195 |
196 |
197 |
def test_trimmed_pagination(request_get, playlist_html, playlist_long_html):
198 |
url = ""
199 |
request_get.side_effect = [
200 |
201 |
'{"content_html":"<a href=\\"/watch?v=BcWz41-4cDk&feature=plpp_video&ved'
202 |
'=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", "load_more_widget_html":""}',
203 |
204 |
205 |
playlist = Playlist(url)
206 |
assert len(list(playlist.trimmed("FN9vC8aR7Yk"))) == 3
207 |
assert request_get.call_count == 1
208 |
209 |
210 |
211 |
def test_trimmed_pagination_not_found(request_get, playlist_html, playlist_long_html):
212 |
url = ""
213 |
request_get.side_effect = [
214 |
215 |
'{"content_html":"<a href=\\"/watch?v=BcWz41-4cDk&feature=plpp_video&ved'
216 |
'=CCYQxjQYACITCO33n5-pn-cCFUG3xAodLogN2yj6LA\\">}", "load_more_widget_html":""}',
217 |
218 |
219 |
playlist = Playlist(url)
220 |
assert len(list(playlist.trimmed("wont-be-found"))) == 101
@@ -0,0 +1,28 @@
1 |
#!/usr/bin/env python3
2 |
3 |
# flake8: noqa: E402
4 |
5 |
from os import path
6 |
import sys
7 |
import json
8 |
9 |
currentdir = path.dirname(path.realpath(__file__))
10 |
parentdir = path.dirname(currentdir)
11 |
12 |
13 |
from pytube import YouTube
14 |
15 |
yt = YouTube(sys.argv[1], defer_prefetch_init=True)
16 |
17 |
output = {
18 |
"url": sys.argv[1],
19 |
"watch_html": yt.watch_html,
20 |
"video_info": yt.vid_info,
21 |
"js": yt.js,
22 |
"embed_html": yt.embed_html,
23 |
24 |
25 |
outpath = path.join(currentdir, "mocks", "yt-video-" + yt.video_id + ".json")
26 |
print("Writing to: " + outpath)
27 |
with open(outpath, "w") as f:
28 |
json.dump(output, f)
Binary file (33.2 kB). View file
Binary file (47.7 kB). View file