nficano commited on
Commit
c5d74e1
·
1 Parent(s): 4463e34

rearranged caption helpers

Browse files
Files changed (2) hide show
  1. pytube/captions.py +51 -2
  2. pytube/helpers.py +0 -51
pytube/captions.py CHANGED
@@ -1,7 +1,11 @@
1
  # -*- coding: utf-8 -*-
2
  """This module contrains a container for caption tracks."""
 
 
 
 
3
  from pytube import request
4
- from pytube.helpers import xml_caption_to_srt
5
 
6
 
7
  class Caption:
@@ -28,7 +32,52 @@ class Caption:
28
  Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
29
  recompiles them into the "SubRip Subtitle" format.
30
  """
31
- return xml_caption_to_srt(self.xml_captions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def __repr__(self):
34
  """Printable object representation."""
 
1
  # -*- coding: utf-8 -*-
2
  """This module contrains a container for caption tracks."""
3
+ import math
4
+ import time
5
+ import xml.etree.ElementTree as ElementTree
6
+
7
  from pytube import request
8
+ from pytube.compat import unescape
9
 
10
 
11
  class Caption:
 
32
  Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
33
  recompiles them into the "SubRip Subtitle" format.
34
  """
35
+ return self.xml_caption_to_srt(self.xml_captions)
36
+
37
+ def float_to_srt_time_format(self, d):
38
+ """Convert decimal durations into proper srt format.
39
+
40
+ :rtype: str
41
+ :returns:
42
+ SubRip Subtitle (str) formatted time duration.
43
+
44
+ >>> float_to_srt_time_format(3.89)
45
+ '00:00:03,890'
46
+ """
47
+ frac, whole = math.modf(d)
48
+ time_fmt = time.strftime('0%H:0%M:%S,', time.gmtime(whole))
49
+ ms = '{:.3f}'.format(frac).replace('0.', '')
50
+ return time_fmt + ms
51
+
52
+ def xml_caption_to_srt(self, xml_captions):
53
+ """Convert xml caption tracks to "SubRip Subtitle (srt)".
54
+
55
+ :param str xml_captions:
56
+ XML formatted caption tracks.
57
+ """
58
+ segments = []
59
+ root = ElementTree.fromstring(xml_captions)
60
+ for i, child in enumerate(root.getchildren()):
61
+ text = child.text or ''
62
+ caption = unescape(
63
+ text
64
+ .replace('\n', ' ')
65
+ .replace(' ', ' '),
66
+ )
67
+ duration = float(child.attrib['dur'])
68
+ start = float(child.attrib['start'])
69
+ end = start + duration
70
+ sequence_number = i + 1 # convert from 0-indexed to 1.
71
+ line = (
72
+ '{seq}\n{start} --> {end}\n{text}\n'.format(
73
+ seq=sequence_number,
74
+ start=self.float_to_srt_time_format(start),
75
+ end=self.float_to_srt_time_format(end),
76
+ text=caption,
77
+ )
78
+ )
79
+ segments.append(line)
80
+ return '\n'.join(segments).strip()
81
 
82
  def __repr__(self):
83
  """Printable object representation."""
pytube/helpers.py CHANGED
@@ -3,13 +3,9 @@
3
  from __future__ import absolute_import
4
 
5
  import logging
6
- import math
7
  import pprint
8
  import re
9
- import time
10
- import xml.etree.ElementTree as ElementTree
11
 
12
- from pytube.compat import unescape
13
  from pytube.compat import unicode
14
  from pytube.exceptions import RegexMatchError
15
 
@@ -92,50 +88,3 @@ def safe_filename(s, max_length=255):
92
  regex = re.compile(pattern, re.UNICODE)
93
  filename = regex.sub('', s)
94
  return unicode(filename[:max_length].rsplit(' ', 0)[0])
95
-
96
-
97
- def float_to_srt_time_format(d):
98
- """Convert decimal durations into proper srt format.
99
-
100
- :rtype: str
101
- :returns:
102
- SubRip Subtitle (str) formatted time duration.
103
-
104
- >>> float_to_srt_time_format(3.89)
105
- '00:00:03,890'
106
- """
107
- frac, whole = math.modf(d)
108
- time_fmt = time.strftime('0%H:0%M:%S,', time.gmtime(whole))
109
- ms = '{:.3f}'.format(frac).replace('0.', '')
110
- return time_fmt + ms
111
-
112
-
113
- def xml_caption_to_srt(xml_captions):
114
- """Convert xml caption tracks to "SubRip Subtitle (srt)".
115
-
116
- :param str xml_captions:
117
- XML formatted caption tracks.
118
- """
119
- segments = []
120
- root = ElementTree.fromstring(xml_captions)
121
- for i, child in enumerate(root.getchildren()):
122
- text = child.text or ''
123
- caption = unescape(
124
- text
125
- .replace('\n', ' ')
126
- .replace(' ', ' '),
127
- )
128
- duration = float(child.attrib['dur'])
129
- start = float(child.attrib['start'])
130
- end = start + duration
131
- sequence_number = i + 1 # convert from 0-indexed to 1.
132
- line = (
133
- '{seq}\n{start} --> {end}\n{text}\n'.format(
134
- seq=sequence_number,
135
- start=float_to_srt_time_format(start),
136
- end=float_to_srt_time_format(end),
137
- text=caption,
138
- )
139
- )
140
- segments.append(line)
141
- return '\n'.join(segments).strip()
 
3
  from __future__ import absolute_import
4
 
5
  import logging
 
6
  import pprint
7
  import re
 
 
8
 
 
9
  from pytube.compat import unicode
10
  from pytube.exceptions import RegexMatchError
11
 
 
88
  regex = re.compile(pattern, re.UNICODE)
89
  filename = regex.sub('', s)
90
  return unicode(filename[:max_length].rsplit(' ', 0)[0])