Alexander Czyrny commited on
Commit
1776b2f
·
unverified ·
1 Parent(s): 67e6144

Extracting publish date (#794)

Browse files

* Extracting publish date

* Added unit test for .publish_date attribute.

Files changed (3) hide show
  1. pytube/__main__.py +9 -0
  2. pytube/extract.py +19 -0
  3. tests/test_extract.py +7 -0
pytube/__main__.py CHANGED
@@ -262,6 +262,15 @@ class YouTube:
262
 
263
  return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
264
 
 
 
 
 
 
 
 
 
 
265
  @property
266
  def title(self) -> str:
267
  """Get the video title.
 
262
 
263
  return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
264
 
265
+ @property
266
+ def publish_date(self):
267
+ """Get the publish date.
268
+
269
+ :rtype: datetime
270
+
271
+ """
272
+ return extract.publish_date(self.watch_html)
273
+
274
  @property
275
  def title(self) -> str:
276
  """Get the video title.
pytube/extract.py CHANGED
@@ -4,6 +4,7 @@ import json
4
  import logging
5
  import re
6
  from collections import OrderedDict
 
7
  from typing import Any
8
  from typing import Dict
9
  from typing import List
@@ -22,6 +23,24 @@ from pytube.helpers import regex_search
22
  logger = logging.getLogger(__name__)
23
 
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def recording_available(watch_html):
26
  """Check if live stream recording is available.
27
 
 
4
  import logging
5
  import re
6
  from collections import OrderedDict
7
+ from datetime import datetime
8
  from typing import Any
9
  from typing import Dict
10
  from typing import List
 
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
+ def publish_date(watch_html: str):
27
+ """Extract publish date
28
+ :param str watch_html:
29
+ The html contents of the watch page.
30
+ :rtype: str
31
+ :returns:
32
+ Publish date of the video.
33
+ """
34
+ try:
35
+ result = regex_search(
36
+ r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}",
37
+ watch_html, group=0
38
+ )
39
+ except RegexMatchError:
40
+ return None
41
+ return datetime.strptime(result, '%Y-%m-%d')
42
+
43
+
44
  def recording_available(watch_html):
45
  """Check if live stream recording is available.
46
 
tests/test_extract.py CHANGED
@@ -1,5 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
  """Unit tests for the :module:`extract <extract>` module."""
 
3
  import pytest
4
 
5
  from pytube import extract
@@ -64,6 +65,12 @@ def test_recording_available(cipher_signature):
64
  assert extract.recording_available(cipher_signature.watch_html)
65
 
66
 
 
 
 
 
 
 
67
  def test_not_recording_available(missing_recording):
68
  assert not extract.recording_available(missing_recording['watch_html'])
69
 
 
1
  # -*- coding: utf-8 -*-
2
  """Unit tests for the :module:`extract <extract>` module."""
3
+ from datetime import datetime
4
  import pytest
5
 
6
  from pytube import extract
 
65
  assert extract.recording_available(cipher_signature.watch_html)
66
 
67
 
68
+ def test_publish_date(cipher_signature):
69
+ expected = datetime(2019, 12, 5)
70
+ assert cipher_signature.publish_date == expected
71
+ assert extract.publish_date('') is None
72
+
73
+
74
  def test_not_recording_available(missing_recording):
75
  assert not extract.recording_available(missing_recording['watch_html'])
76