rearranged caption helpers
Browse files- pytube/captions.py +51 -2
- pytube/helpers.py +0 -51
pytube/captions.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""This module contrains a container for caption tracks."""
|
|
|
|
|
|
|
|
|
3 |
from pytube import request
|
4 |
-
from pytube.
|
5 |
|
6 |
|
7 |
class Caption:
|
@@ -28,7 +32,52 @@ class Caption:
|
|
28 |
Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
|
29 |
recompiles them into the "SubRip Subtitle" format.
|
30 |
"""
|
31 |
-
return xml_caption_to_srt(self.xml_captions)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def __repr__(self):
|
34 |
"""Printable object representation."""
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
"""This module contrains a container for caption tracks."""
|
3 |
+
import math
|
4 |
+
import time
|
5 |
+
import xml.etree.ElementTree as ElementTree
|
6 |
+
|
7 |
from pytube import request
|
8 |
+
from pytube.compat import unescape
|
9 |
|
10 |
|
11 |
class Caption:
|
|
|
32 |
Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
|
33 |
recompiles them into the "SubRip Subtitle" format.
|
34 |
"""
|
35 |
+
return self.xml_caption_to_srt(self.xml_captions)
|
36 |
+
|
37 |
+
def float_to_srt_time_format(self, d):
|
38 |
+
"""Convert decimal durations into proper srt format.
|
39 |
+
|
40 |
+
:rtype: str
|
41 |
+
:returns:
|
42 |
+
SubRip Subtitle (str) formatted time duration.
|
43 |
+
|
44 |
+
>>> float_to_srt_time_format(3.89)
|
45 |
+
'00:00:03,890'
|
46 |
+
"""
|
47 |
+
frac, whole = math.modf(d)
|
48 |
+
time_fmt = time.strftime('0%H:0%M:%S,', time.gmtime(whole))
|
49 |
+
ms = '{:.3f}'.format(frac).replace('0.', '')
|
50 |
+
return time_fmt + ms
|
51 |
+
|
52 |
+
def xml_caption_to_srt(self, xml_captions):
|
53 |
+
"""Convert xml caption tracks to "SubRip Subtitle (srt)".
|
54 |
+
|
55 |
+
:param str xml_captions:
|
56 |
+
XML formatted caption tracks.
|
57 |
+
"""
|
58 |
+
segments = []
|
59 |
+
root = ElementTree.fromstring(xml_captions)
|
60 |
+
for i, child in enumerate(root.getchildren()):
|
61 |
+
text = child.text or ''
|
62 |
+
caption = unescape(
|
63 |
+
text
|
64 |
+
.replace('\n', ' ')
|
65 |
+
.replace(' ', ' '),
|
66 |
+
)
|
67 |
+
duration = float(child.attrib['dur'])
|
68 |
+
start = float(child.attrib['start'])
|
69 |
+
end = start + duration
|
70 |
+
sequence_number = i + 1 # convert from 0-indexed to 1.
|
71 |
+
line = (
|
72 |
+
'{seq}\n{start} --> {end}\n{text}\n'.format(
|
73 |
+
seq=sequence_number,
|
74 |
+
start=self.float_to_srt_time_format(start),
|
75 |
+
end=self.float_to_srt_time_format(end),
|
76 |
+
text=caption,
|
77 |
+
)
|
78 |
+
)
|
79 |
+
segments.append(line)
|
80 |
+
return '\n'.join(segments).strip()
|
81 |
|
82 |
def __repr__(self):
|
83 |
"""Printable object representation."""
|
pytube/helpers.py
CHANGED
@@ -3,13 +3,9 @@
|
|
3 |
from __future__ import absolute_import
|
4 |
|
5 |
import logging
|
6 |
-
import math
|
7 |
import pprint
|
8 |
import re
|
9 |
-
import time
|
10 |
-
import xml.etree.ElementTree as ElementTree
|
11 |
|
12 |
-
from pytube.compat import unescape
|
13 |
from pytube.compat import unicode
|
14 |
from pytube.exceptions import RegexMatchError
|
15 |
|
@@ -92,50 +88,3 @@ def safe_filename(s, max_length=255):
|
|
92 |
regex = re.compile(pattern, re.UNICODE)
|
93 |
filename = regex.sub('', s)
|
94 |
return unicode(filename[:max_length].rsplit(' ', 0)[0])
|
95 |
-
|
96 |
-
|
97 |
-
def float_to_srt_time_format(d):
|
98 |
-
"""Convert decimal durations into proper srt format.
|
99 |
-
|
100 |
-
:rtype: str
|
101 |
-
:returns:
|
102 |
-
SubRip Subtitle (str) formatted time duration.
|
103 |
-
|
104 |
-
>>> float_to_srt_time_format(3.89)
|
105 |
-
'00:00:03,890'
|
106 |
-
"""
|
107 |
-
frac, whole = math.modf(d)
|
108 |
-
time_fmt = time.strftime('0%H:0%M:%S,', time.gmtime(whole))
|
109 |
-
ms = '{:.3f}'.format(frac).replace('0.', '')
|
110 |
-
return time_fmt + ms
|
111 |
-
|
112 |
-
|
113 |
-
def xml_caption_to_srt(xml_captions):
|
114 |
-
"""Convert xml caption tracks to "SubRip Subtitle (srt)".
|
115 |
-
|
116 |
-
:param str xml_captions:
|
117 |
-
XML formatted caption tracks.
|
118 |
-
"""
|
119 |
-
segments = []
|
120 |
-
root = ElementTree.fromstring(xml_captions)
|
121 |
-
for i, child in enumerate(root.getchildren()):
|
122 |
-
text = child.text or ''
|
123 |
-
caption = unescape(
|
124 |
-
text
|
125 |
-
.replace('\n', ' ')
|
126 |
-
.replace(' ', ' '),
|
127 |
-
)
|
128 |
-
duration = float(child.attrib['dur'])
|
129 |
-
start = float(child.attrib['start'])
|
130 |
-
end = start + duration
|
131 |
-
sequence_number = i + 1 # convert from 0-indexed to 1.
|
132 |
-
line = (
|
133 |
-
'{seq}\n{start} --> {end}\n{text}\n'.format(
|
134 |
-
seq=sequence_number,
|
135 |
-
start=float_to_srt_time_format(start),
|
136 |
-
end=float_to_srt_time_format(end),
|
137 |
-
text=caption,
|
138 |
-
)
|
139 |
-
)
|
140 |
-
segments.append(line)
|
141 |
-
return '\n'.join(segments).strip()
|
|
|
3 |
from __future__ import absolute_import
|
4 |
|
5 |
import logging
|
|
|
6 |
import pprint
|
7 |
import re
|
|
|
|
|
8 |
|
|
|
9 |
from pytube.compat import unicode
|
10 |
from pytube.exceptions import RegexMatchError
|
11 |
|
|
|
88 |
regex = re.compile(pattern, re.UNICODE)
|
89 |
filename = regex.sub('', s)
|
90 |
return unicode(filename[:max_length].rsplit(' ', 0)[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|