Merge pull request #453 from bigg215/master
Browse filesFixed 'title' KeyError and 403 forbidden issues on signed videos
- pytube/__main__.py +14 -0
- pytube/cipher.py +13 -5
- pytube/mixins.py +6 -3
- pytube/request.py +8 -1
pytube/__main__.py
CHANGED
@@ -117,6 +117,20 @@ class YouTube(object):
|
|
117 |
self.watch_html,
|
118 |
)['args']
|
119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
self.vid_descr = extract.get_vid_descr(self.watch_html)
|
121 |
# https://github.com/nficano/pytube/issues/165
|
122 |
stream_maps = ['url_encoded_fmt_stream_map']
|
|
|
117 |
self.watch_html,
|
118 |
)['args']
|
119 |
|
120 |
+
# Fix for KeyError: 'title' issue #434
|
121 |
+
if 'title' not in self.player_config_args:
|
122 |
+
try:
|
123 |
+
from bs4 import BeautifulSoup
|
124 |
+
soup = BeautifulSoup(self.watch_html, 'lxml')
|
125 |
+
title = soup.title.get_text().strip()
|
126 |
+
except ModuleNotFoundError:
|
127 |
+
i_start = self.watch_html.lower().index('<title>') + len('<title>')
|
128 |
+
i_end = self.watch_html.lower().index('</title>')
|
129 |
+
title = self.watch_html[i_start:i_end].strip()
|
130 |
+
index = title.lower().rfind(' - youtube')
|
131 |
+
title = title[:index] if index > 0 else title
|
132 |
+
self.player_config_args['title'] = title
|
133 |
+
|
134 |
self.vid_descr = extract.get_vid_descr(self.watch_html)
|
135 |
# https://github.com/nficano/pytube/issues/165
|
136 |
stream_maps = ['url_encoded_fmt_stream_map']
|
pytube/cipher.py
CHANGED
@@ -35,14 +35,22 @@ def get_initial_function_name(js):
|
|
35 |
|
36 |
"""
|
37 |
# c&&d.set("signature", EE(c));
|
|
|
|
|
38 |
pattern = [
|
39 |
-
r'
|
40 |
-
r'
|
41 |
-
r'
|
|
|
42 |
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
43 |
-
r'\
|
44 |
-
r'\s
|
|
|
|
|
|
|
|
|
45 |
]
|
|
|
46 |
logger.debug('finding initial function name')
|
47 |
return regex_search(pattern, js, group=1)
|
48 |
|
|
|
35 |
|
36 |
"""
|
37 |
# c&&d.set("signature", EE(c));
|
38 |
+
|
39 |
+
#403 Forbidden fix.
|
40 |
pattern = [
|
41 |
+
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
42 |
+
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
43 |
+
r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
|
44 |
+
r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
45 |
r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
|
46 |
+
r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<si$',
|
47 |
+
r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
48 |
+
r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
49 |
+
r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
50 |
+
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
|
51 |
+
r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
|
52 |
]
|
53 |
+
|
54 |
logger.debug('finding initial function name')
|
55 |
return regex_search(pattern, js, group=1)
|
56 |
|
pytube/mixins.py
CHANGED
@@ -37,8 +37,10 @@ def apply_signature(config_args, fmt, js):
|
|
37 |
url = stream['url']
|
38 |
elif live_stream:
|
39 |
raise LiveStreamError('Video is currently being streamed live')
|
40 |
-
|
41 |
-
if
|
|
|
|
|
42 |
# For certain videos, YouTube will just provide them pre-signed, in
|
43 |
# which case there's no real magic to download them and we can skip
|
44 |
# the whole signature descrambling entirely.
|
@@ -61,7 +63,8 @@ def apply_signature(config_args, fmt, js):
|
|
61 |
}, indent=2,
|
62 |
),
|
63 |
)
|
64 |
-
|
|
|
65 |
|
66 |
|
67 |
def apply_descrambler(stream_data, key):
|
|
|
37 |
url = stream['url']
|
38 |
elif live_stream:
|
39 |
raise LiveStreamError('Video is currently being streamed live')
|
40 |
+
#403 Forbidden fix.
|
41 |
+
if('signature' in url or
|
42 |
+
('s' not in stream and
|
43 |
+
('&sig=' in url or '&lsig=' in url))):
|
44 |
# For certain videos, YouTube will just provide them pre-signed, in
|
45 |
# which case there's no real magic to download them and we can skip
|
46 |
# the whole signature descrambling entirely.
|
|
|
63 |
}, indent=2,
|
64 |
),
|
65 |
)
|
66 |
+
#403 forbidden fix
|
67 |
+
stream_manifest[i]['url'] = url + '&sig=' + signature
|
68 |
|
69 |
|
70 |
def apply_descrambler(stream_data, key):
|
pytube/request.py
CHANGED
@@ -2,6 +2,8 @@
|
|
2 |
"""Implements a simple wrapper around urlopen."""
|
3 |
from pytube.compat import urlopen
|
4 |
|
|
|
|
|
5 |
|
6 |
def get(
|
7 |
url=None, headers=False,
|
@@ -18,7 +20,12 @@ def get(
|
|
18 |
:param int chunk_size:
|
19 |
The size in bytes of each chunk.
|
20 |
"""
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
22 |
if streaming:
|
23 |
return stream_response(response, chunk_size)
|
24 |
elif headers:
|
|
|
2 |
"""Implements a simple wrapper around urlopen."""
|
3 |
from pytube.compat import urlopen
|
4 |
|
5 |
+
#403 forbidden fix
|
6 |
+
import urllib.request
|
7 |
|
8 |
def get(
|
9 |
url=None, headers=False,
|
|
|
20 |
:param int chunk_size:
|
21 |
The size in bytes of each chunk.
|
22 |
"""
|
23 |
+
|
24 |
+
#403 forbidden fix
|
25 |
+
req = urllib.request.Request(url, headers = {"User-Agent": "Mozilla/5.0"})
|
26 |
+
#response = urlopen(url)
|
27 |
+
response = urlopen(req)
|
28 |
+
|
29 |
if streaming:
|
30 |
return stream_response(response, chunk_size)
|
31 |
elif headers:
|