nficano commited on
Commit
edd7be4
·
unverified ·
2 Parent(s): 9dbec36 fac934b

Merge pull request #453 from bigg215/master

Browse files

Fixed 'title' KeyError and 403 forbidden issues on signed videos

Files changed (4) hide show
  1. pytube/__main__.py +14 -0
  2. pytube/cipher.py +13 -5
  3. pytube/mixins.py +6 -3
  4. pytube/request.py +8 -1
pytube/__main__.py CHANGED
@@ -117,6 +117,20 @@ class YouTube(object):
117
  self.watch_html,
118
  )['args']
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  self.vid_descr = extract.get_vid_descr(self.watch_html)
121
  # https://github.com/nficano/pytube/issues/165
122
  stream_maps = ['url_encoded_fmt_stream_map']
 
117
  self.watch_html,
118
  )['args']
119
 
120
+ # Fix for KeyError: 'title' issue #434
121
+ if 'title' not in self.player_config_args:
122
+ try:
123
+ from bs4 import BeautifulSoup
124
+ soup = BeautifulSoup(self.watch_html, 'lxml')
125
+ title = soup.title.get_text().strip()
126
+ except ModuleNotFoundError:
127
+ i_start = self.watch_html.lower().index('<title>') + len('<title>')
128
+ i_end = self.watch_html.lower().index('</title>')
129
+ title = self.watch_html[i_start:i_end].strip()
130
+ index = title.lower().rfind(' - youtube')
131
+ title = title[:index] if index > 0 else title
132
+ self.player_config_args['title'] = title
133
+
134
  self.vid_descr = extract.get_vid_descr(self.watch_html)
135
  # https://github.com/nficano/pytube/issues/165
136
  stream_maps = ['url_encoded_fmt_stream_map']
pytube/cipher.py CHANGED
@@ -35,14 +35,22 @@ def get_initial_function_name(js):
35
 
36
  """
37
  # c&&d.set("signature", EE(c));
 
 
38
  pattern = [
39
- r'yt\.akamaized\.net/\)\s*\|\|\s*'
40
- r'.*?\s*c\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent'
41
- r'\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
 
42
  r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
43
- r'\bc\s*&&\s*d\.set\([^,]+\s*,\s*(?:encodeURIComponent'
44
- r'\s*\()?(?P<sig>[a-zA-Z0-9$]+)\(',
 
 
 
 
45
  ]
 
46
  logger.debug('finding initial function name')
47
  return regex_search(pattern, js, group=1)
48
 
 
35
 
36
  """
37
  # c&&d.set("signature", EE(c));
38
+
39
+ #403 Forbidden fix.
40
  pattern = [
41
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
42
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
43
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
44
+ r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
45
  r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
46
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<si$',
47
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
48
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
49
+ r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
50
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
51
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('
52
  ]
53
+
54
  logger.debug('finding initial function name')
55
  return regex_search(pattern, js, group=1)
56
 
pytube/mixins.py CHANGED
@@ -37,8 +37,10 @@ def apply_signature(config_args, fmt, js):
37
  url = stream['url']
38
  elif live_stream:
39
  raise LiveStreamError('Video is currently being streamed live')
40
-
41
- if any([x in url for x in ['signature=', 'sig=']]):
 
 
42
  # For certain videos, YouTube will just provide them pre-signed, in
43
  # which case there's no real magic to download them and we can skip
44
  # the whole signature descrambling entirely.
@@ -61,7 +63,8 @@ def apply_signature(config_args, fmt, js):
61
  }, indent=2,
62
  ),
63
  )
64
- stream_manifest[i]['url'] = url + '&signature=' + signature
 
65
 
66
 
67
  def apply_descrambler(stream_data, key):
 
37
  url = stream['url']
38
  elif live_stream:
39
  raise LiveStreamError('Video is currently being streamed live')
40
+ #403 Forbidden fix.
41
+ if('signature' in url or
42
+ ('s' not in stream and
43
+ ('&sig=' in url or '&lsig=' in url))):
44
  # For certain videos, YouTube will just provide them pre-signed, in
45
  # which case there's no real magic to download them and we can skip
46
  # the whole signature descrambling entirely.
 
63
  }, indent=2,
64
  ),
65
  )
66
+ #403 forbidden fix
67
+ stream_manifest[i]['url'] = url + '&sig=' + signature
68
 
69
 
70
  def apply_descrambler(stream_data, key):
pytube/request.py CHANGED
@@ -2,6 +2,8 @@
2
  """Implements a simple wrapper around urlopen."""
3
  from pytube.compat import urlopen
4
 
 
 
5
 
6
  def get(
7
  url=None, headers=False,
@@ -18,7 +20,12 @@ def get(
18
  :param int chunk_size:
19
  The size in bytes of each chunk.
20
  """
21
- response = urlopen(url)
 
 
 
 
 
22
  if streaming:
23
  return stream_response(response, chunk_size)
24
  elif headers:
 
2
  """Implements a simple wrapper around urlopen."""
3
  from pytube.compat import urlopen
4
 
5
+ #403 forbidden fix
6
+ import urllib.request
7
 
8
  def get(
9
  url=None, headers=False,
 
20
  :param int chunk_size:
21
  The size in bytes of each chunk.
22
  """
23
+
24
+ #403 forbidden fix
25
+ req = urllib.request.Request(url, headers = {"User-Agent": "Mozilla/5.0"})
26
+ #response = urlopen(url)
27
+ response = urlopen(req)
28
+
29
  if streaming:
30
  return stream_response(response, chunk_size)
31
  elif headers: