Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
""" | |
This module offers a generic date/time string parser which is able to parse | |
most known formats to represent a date and/or time. | |
This module attempts to be forgiving with regards to unlikely input formats, | |
returning a datetime object even for dates which are ambiguous. If an element | |
of a date/time stamp is omitted, the following rules are applied: | |
- If AM or PM is left unspecified, a 24-hour clock is assumed, however, an hour | |
on a 12-hour clock (``0 <= hour <= 12``) *must* be specified if AM or PM is | |
specified. | |
- If a time zone is omitted, a timezone-naive datetime is returned. | |
If any other elements are missing, they are taken from the | |
:class:`datetime.datetime` object passed to the parameter ``default``. If this | |
results in a day number exceeding the valid number of days per month, the | |
value falls back to the end of the month. | |
Additional resources about date/time string formats can be found below: | |
- `A summary of the international standard date and time notation | |
<https://www.cl.cam.ac.uk/~mgk25/iso-time.html>`_ | |
- `W3C Date and Time Formats <https://www.w3.org/TR/NOTE-datetime>`_ | |
- `Time Formats (Planetary Rings Node) <https://pds-rings.seti.org:443/tools/time_formats.html>`_ | |
- `CPAN ParseDate module | |
<https://metacpan.org/pod/release/MUIR/Time-modules-2013.0912/lib/Time/ParseDate.pm>`_ | |
- `Java SimpleDateFormat Class | |
<https://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html>`_ | |
""" | |
from __future__ import unicode_literals | |
import datetime | |
import re | |
import string | |
import time | |
import warnings | |
from calendar import monthrange | |
from io import StringIO | |
import six | |
from six import integer_types, text_type | |
from decimal import Decimal | |
from warnings import warn | |
from .. import relativedelta | |
from .. import tz | |
__all__ = ["parse", "parserinfo", "ParserError"] | |
# TODO: pandas.core.tools.datetimes imports this explicitly. Might be worth | |
# making public and/or figuring out if there is something we can | |
# take off their plate. | |
class _timelex(object): | |
# Fractional seconds are sometimes split by a comma | |
_split_decimal = re.compile("([.,])") | |
def __init__(self, instream): | |
if isinstance(instream, (bytes, bytearray)): | |
instream = instream.decode() | |
if isinstance(instream, text_type): | |
instream = StringIO(instream) | |
elif getattr(instream, 'read', None) is None: | |
raise TypeError('Parser must be a string or character stream, not ' | |
'{itype}'.format(itype=instream.__class__.__name__)) | |
self.instream = instream | |
self.charstack = [] | |
self.tokenstack = [] | |
self.eof = False | |
def get_token(self): | |
""" | |
This function breaks the time string into lexical units (tokens), which | |
can be parsed by the parser. Lexical units are demarcated by changes in | |
the character set, so any continuous string of letters is considered | |
one unit, any continuous string of numbers is considered one unit. | |
The main complication arises from the fact that dots ('.') can be used | |
both as separators (e.g. "Sep.20.2009") or decimal points (e.g. | |
"4:30:21.447"). As such, it is necessary to read the full context of | |
any dot-separated strings before breaking it into tokens; as such, this | |
function maintains a "token stack", for when the ambiguous context | |
demands that multiple tokens be parsed at once. | |
""" | |
if self.tokenstack: | |
return self.tokenstack.pop(0) | |
seenletters = False | |
token = None | |
state = None | |
while not self.eof: | |
# We only realize that we've reached the end of a token when we | |
# find a character that's not part of the current token - since | |
# that character may be part of the next token, it's stored in the | |
# charstack. | |
if self.charstack: | |
nextchar = self.charstack.pop(0) | |
else: | |
nextchar = self.instream.read(1) | |
while nextchar == '\x00': | |
nextchar = self.instream.read(1) | |
if not nextchar: | |
self.eof = True | |
break | |
elif not state: | |
# First character of the token - determines if we're starting | |
# to parse a word, a number or something else. | |
token = nextchar | |
if self.isword(nextchar): | |
state = 'a' | |
elif self.isnum(nextchar): | |
state = '0' | |
elif self.isspace(nextchar): | |
token = ' ' | |
break # emit token | |
else: | |
break # emit token | |
elif state == 'a': | |
# If we've already started reading a word, we keep reading | |
# letters until we find something that's not part of a word. | |
seenletters = True | |
if self.isword(nextchar): | |
token += nextchar | |
elif nextchar == '.': | |
token += nextchar | |
state = 'a.' | |
else: | |
self.charstack.append(nextchar) | |
break # emit token | |
elif state == '0': | |
# If we've already started reading a number, we keep reading | |
# numbers until we find something that doesn't fit. | |
if self.isnum(nextchar): | |
token += nextchar | |
elif nextchar == '.' or (nextchar == ',' and len(token) >= 2): | |
token += nextchar | |
state = '0.' | |
else: | |
self.charstack.append(nextchar) | |
break # emit token | |
elif state == 'a.': | |
# If we've seen some letters and a dot separator, continue | |
# parsing, and the tokens will be broken up later. | |
seenletters = True | |
if nextchar == '.' or self.isword(nextchar): | |
token += nextchar | |
elif self.isnum(nextchar) and token[-1] == '.': | |
token += nextchar | |
state = '0.' | |
else: | |
self.charstack.append(nextchar) | |
break # emit token | |
elif state == '0.': | |
# If we've seen at least one dot separator, keep going, we'll | |
# break up the tokens later. | |
if nextchar == '.' or self.isnum(nextchar): | |
token += nextchar | |
elif self.isword(nextchar) and token[-1] == '.': | |
token += nextchar | |
state = 'a.' | |
else: | |
self.charstack.append(nextchar) | |
break # emit token | |
if (state in ('a.', '0.') and (seenletters or token.count('.') > 1 or | |
token[-1] in '.,')): | |
l = self._split_decimal.split(token) | |
token = l[0] | |
for tok in l[1:]: | |
if tok: | |
self.tokenstack.append(tok) | |
if state == '0.' and token.count('.') == 0: | |
token = token.replace(',', '.') | |
return token | |
def __iter__(self): | |
return self | |
def __next__(self): | |
token = self.get_token() | |
if token is None: | |
raise StopIteration | |
return token | |
def next(self): | |
return self.__next__() # Python 2.x support | |
def split(cls, s): | |
return list(cls(s)) | |
def isword(cls, nextchar): | |
""" Whether or not the next character is part of a word """ | |
return nextchar.isalpha() | |
def isnum(cls, nextchar): | |
""" Whether the next character is part of a number """ | |
return nextchar.isdigit() | |
def isspace(cls, nextchar): | |
""" Whether the next character is whitespace """ | |
return nextchar.isspace() | |
class _resultbase(object): | |
def __init__(self): | |
for attr in self.__slots__: | |
setattr(self, attr, None) | |
def _repr(self, classname): | |
l = [] | |
for attr in self.__slots__: | |
value = getattr(self, attr) | |
if value is not None: | |
l.append("%s=%s" % (attr, repr(value))) | |
return "%s(%s)" % (classname, ", ".join(l)) | |
def __len__(self): | |
return (sum(getattr(self, attr) is not None | |
for attr in self.__slots__)) | |
def __repr__(self): | |
return self._repr(self.__class__.__name__) | |
class parserinfo(object): | |
""" | |
Class which handles what inputs are accepted. Subclass this to customize | |
the language and acceptable values for each parameter. | |
:param dayfirst: | |
Whether to interpret the first value in an ambiguous 3-integer date | |
(e.g. 01/05/09) as the day (``True``) or month (``False``). If | |
``yearfirst`` is set to ``True``, this distinguishes between YDM | |
and YMD. Default is ``False``. | |
:param yearfirst: | |
Whether to interpret the first value in an ambiguous 3-integer date | |
(e.g. 01/05/09) as the year. If ``True``, the first number is taken | |
to be the year, otherwise the last number is taken to be the year. | |
Default is ``False``. | |
""" | |
# m from a.m/p.m, t from ISO T separator | |
JUMP = [" ", ".", ",", ";", "-", "/", "'", | |
"at", "on", "and", "ad", "m", "t", "of", | |
"st", "nd", "rd", "th"] | |
WEEKDAYS = [("Mon", "Monday"), | |
("Tue", "Tuesday"), # TODO: "Tues" | |
("Wed", "Wednesday"), | |
("Thu", "Thursday"), # TODO: "Thurs" | |
("Fri", "Friday"), | |
("Sat", "Saturday"), | |
("Sun", "Sunday")] | |
MONTHS = [("Jan", "January"), | |
("Feb", "February"), # TODO: "Febr" | |
("Mar", "March"), | |
("Apr", "April"), | |
("May", "May"), | |
("Jun", "June"), | |
("Jul", "July"), | |
("Aug", "August"), | |
("Sep", "Sept", "September"), | |
("Oct", "October"), | |
("Nov", "November"), | |
("Dec", "December")] | |
HMS = [("h", "hour", "hours"), | |
("m", "minute", "minutes"), | |
("s", "second", "seconds")] | |
AMPM = [("am", "a"), | |
("pm", "p")] | |
UTCZONE = ["UTC", "GMT", "Z", "z"] | |
PERTAIN = ["of"] | |
TZOFFSET = {} | |
# TODO: ERA = ["AD", "BC", "CE", "BCE", "Stardate", | |
# "Anno Domini", "Year of Our Lord"] | |
def __init__(self, dayfirst=False, yearfirst=False): | |
self._jump = self._convert(self.JUMP) | |
self._weekdays = self._convert(self.WEEKDAYS) | |
self._months = self._convert(self.MONTHS) | |
self._hms = self._convert(self.HMS) | |
self._ampm = self._convert(self.AMPM) | |
self._utczone = self._convert(self.UTCZONE) | |
self._pertain = self._convert(self.PERTAIN) | |
self.dayfirst = dayfirst | |
self.yearfirst = yearfirst | |
self._year = time.localtime().tm_year | |
self._century = self._year // 100 * 100 | |
def _convert(self, lst): | |
dct = {} | |
for i, v in enumerate(lst): | |
if isinstance(v, tuple): | |
for v in v: | |
dct[v.lower()] = i | |
else: | |
dct[v.lower()] = i | |
return dct | |
def jump(self, name): | |
return name.lower() in self._jump | |
def weekday(self, name): | |
try: | |
return self._weekdays[name.lower()] | |
except KeyError: | |
pass | |
return None | |
def month(self, name): | |
try: | |
return self._months[name.lower()] + 1 | |
except KeyError: | |
pass | |
return None | |
def hms(self, name): | |
try: | |
return self._hms[name.lower()] | |
except KeyError: | |
return None | |
def ampm(self, name): | |
try: | |
return self._ampm[name.lower()] | |
except KeyError: | |
return None | |
def pertain(self, name): | |
return name.lower() in self._pertain | |
def utczone(self, name): | |
return name.lower() in self._utczone | |
def tzoffset(self, name): | |
if name in self._utczone: | |
return 0 | |
return self.TZOFFSET.get(name) | |
def convertyear(self, year, century_specified=False): | |
""" | |
Converts two-digit years to year within [-50, 49] | |
range of self._year (current local time) | |
""" | |
# Function contract is that the year is always positive | |
assert year >= 0 | |
if year < 100 and not century_specified: | |
# assume current century to start | |
year += self._century | |
if year >= self._year + 50: # if too far in future | |
year -= 100 | |
elif year < self._year - 50: # if too far in past | |
year += 100 | |
return year | |
def validate(self, res): | |
# move to info | |
if res.year is not None: | |
res.year = self.convertyear(res.year, res.century_specified) | |
if ((res.tzoffset == 0 and not res.tzname) or | |
(res.tzname == 'Z' or res.tzname == 'z')): | |
res.tzname = "UTC" | |
res.tzoffset = 0 | |
elif res.tzoffset != 0 and res.tzname and self.utczone(res.tzname): | |
res.tzoffset = 0 | |
return True | |
class _ymd(list): | |
def __init__(self, *args, **kwargs): | |
super(self.__class__, self).__init__(*args, **kwargs) | |
self.century_specified = False | |
self.dstridx = None | |
self.mstridx = None | |
self.ystridx = None | |
def has_year(self): | |
return self.ystridx is not None | |
def has_month(self): | |
return self.mstridx is not None | |
def has_day(self): | |
return self.dstridx is not None | |
def could_be_day(self, value): | |
if self.has_day: | |
return False | |
elif not self.has_month: | |
return 1 <= value <= 31 | |
elif not self.has_year: | |
# Be permissive, assume leap year | |
month = self[self.mstridx] | |
return 1 <= value <= monthrange(2000, month)[1] | |
else: | |
month = self[self.mstridx] | |
year = self[self.ystridx] | |
return 1 <= value <= monthrange(year, month)[1] | |
def append(self, val, label=None): | |
if hasattr(val, '__len__'): | |
if val.isdigit() and len(val) > 2: | |
self.century_specified = True | |
if label not in [None, 'Y']: # pragma: no cover | |
raise ValueError(label) | |
label = 'Y' | |
elif val > 100: | |
self.century_specified = True | |
if label not in [None, 'Y']: # pragma: no cover | |
raise ValueError(label) | |
label = 'Y' | |
super(self.__class__, self).append(int(val)) | |
if label == 'M': | |
if self.has_month: | |
raise ValueError('Month is already set') | |
self.mstridx = len(self) - 1 | |
elif label == 'D': | |
if self.has_day: | |
raise ValueError('Day is already set') | |
self.dstridx = len(self) - 1 | |
elif label == 'Y': | |
if self.has_year: | |
raise ValueError('Year is already set') | |
self.ystridx = len(self) - 1 | |
def _resolve_from_stridxs(self, strids): | |
""" | |
Try to resolve the identities of year/month/day elements using | |
ystridx, mstridx, and dstridx, if enough of these are specified. | |
""" | |
if len(self) == 3 and len(strids) == 2: | |
# we can back out the remaining stridx value | |
missing = [x for x in range(3) if x not in strids.values()] | |
key = [x for x in ['y', 'm', 'd'] if x not in strids] | |
assert len(missing) == len(key) == 1 | |
key = key[0] | |
val = missing[0] | |
strids[key] = val | |
assert len(self) == len(strids) # otherwise this should not be called | |
out = {key: self[strids[key]] for key in strids} | |
return (out.get('y'), out.get('m'), out.get('d')) | |
def resolve_ymd(self, yearfirst, dayfirst): | |
len_ymd = len(self) | |
year, month, day = (None, None, None) | |
strids = (('y', self.ystridx), | |
('m', self.mstridx), | |
('d', self.dstridx)) | |
strids = {key: val for key, val in strids if val is not None} | |
if (len(self) == len(strids) > 0 or | |
(len(self) == 3 and len(strids) == 2)): | |
return self._resolve_from_stridxs(strids) | |
mstridx = self.mstridx | |
if len_ymd > 3: | |
raise ValueError("More than three YMD values") | |
elif len_ymd == 1 or (mstridx is not None and len_ymd == 2): | |
# One member, or two members with a month string | |
if mstridx is not None: | |
month = self[mstridx] | |
# since mstridx is 0 or 1, self[mstridx-1] always | |
# looks up the other element | |
other = self[mstridx - 1] | |
else: | |
other = self[0] | |
if len_ymd > 1 or mstridx is None: | |
if other > 31: | |
year = other | |
else: | |
day = other | |
elif len_ymd == 2: | |
# Two members with numbers | |
if self[0] > 31: | |
# 99-01 | |
year, month = self | |
elif self[1] > 31: | |
# 01-99 | |
month, year = self | |
elif dayfirst and self[1] <= 12: | |
# 13-01 | |
day, month = self | |
else: | |
# 01-13 | |
month, day = self | |
elif len_ymd == 3: | |
# Three members | |
if mstridx == 0: | |
if self[1] > 31: | |
# Apr-2003-25 | |
month, year, day = self | |
else: | |
month, day, year = self | |
elif mstridx == 1: | |
if self[0] > 31 or (yearfirst and self[2] <= 31): | |
# 99-Jan-01 | |
year, month, day = self | |
else: | |
# 01-Jan-01 | |
# Give precedence to day-first, since | |
# two-digit years is usually hand-written. | |
day, month, year = self | |
elif mstridx == 2: | |
# WTF!? | |
if self[1] > 31: | |
# 01-99-Jan | |
day, year, month = self | |
else: | |
# 99-01-Jan | |
year, day, month = self | |
else: | |
if (self[0] > 31 or | |
self.ystridx == 0 or | |
(yearfirst and self[1] <= 12 and self[2] <= 31)): | |
# 99-01-01 | |
if dayfirst and self[2] <= 12: | |
year, day, month = self | |
else: | |
year, month, day = self | |
elif self[0] > 12 or (dayfirst and self[1] <= 12): | |
# 13-01-01 | |
day, month, year = self | |
else: | |
# 01-13-01 | |
month, day, year = self | |
return year, month, day | |
class parser(object): | |
def __init__(self, info=None): | |
self.info = info or parserinfo() | |
def parse(self, timestr, default=None, | |
ignoretz=False, tzinfos=None, **kwargs): | |
""" | |
Parse the date/time string into a :class:`datetime.datetime` object. | |
:param timestr: | |
Any date/time string using the supported formats. | |
:param default: | |
The default datetime object, if this is a datetime object and not | |
``None``, elements specified in ``timestr`` replace elements in the | |
default object. | |
:param ignoretz: | |
If set ``True``, time zones in parsed strings are ignored and a | |
naive :class:`datetime.datetime` object is returned. | |
:param tzinfos: | |
Additional time zone names / aliases which may be present in the | |
string. This argument maps time zone names (and optionally offsets | |
from those time zones) to time zones. This parameter can be a | |
dictionary with timezone aliases mapping time zone names to time | |
zones or a function taking two parameters (``tzname`` and | |
``tzoffset``) and returning a time zone. | |
The timezones to which the names are mapped can be an integer | |
offset from UTC in seconds or a :class:`tzinfo` object. | |
.. doctest:: | |
:options: +NORMALIZE_WHITESPACE | |
>>> from dateutil.parser import parse | |
>>> from dateutil.tz import gettz | |
>>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} | |
>>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) | |
datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) | |
>>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) | |
datetime.datetime(2012, 1, 19, 17, 21, | |
tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) | |
This parameter is ignored if ``ignoretz`` is set. | |
:param \\*\\*kwargs: | |
Keyword arguments as passed to ``_parse()``. | |
:return: | |
Returns a :class:`datetime.datetime` object or, if the | |
``fuzzy_with_tokens`` option is ``True``, returns a tuple, the | |
first element being a :class:`datetime.datetime` object, the second | |
a tuple containing the fuzzy tokens. | |
:raises ParserError: | |
Raised for invalid or unknown string format, if the provided | |
:class:`tzinfo` is not in a valid format, or if an invalid date | |
would be created. | |
:raises TypeError: | |
Raised for non-string or character stream input. | |
:raises OverflowError: | |
Raised if the parsed date exceeds the largest valid C integer on | |
your system. | |
""" | |
if default is None: | |
default = datetime.datetime.now().replace(hour=0, minute=0, | |
second=0, microsecond=0) | |
res, skipped_tokens = self._parse(timestr, **kwargs) | |
if res is None: | |
raise ParserError("Unknown string format: %s", timestr) | |
if len(res) == 0: | |
raise ParserError("String does not contain a date: %s", timestr) | |
try: | |
ret = self._build_naive(res, default) | |
except ValueError as e: | |
six.raise_from(ParserError(str(e) + ": %s", timestr), e) | |
if not ignoretz: | |
ret = self._build_tzaware(ret, res, tzinfos) | |
if kwargs.get('fuzzy_with_tokens', False): | |
return ret, skipped_tokens | |
else: | |
return ret | |
class _result(_resultbase): | |
__slots__ = ["year", "month", "day", "weekday", | |
"hour", "minute", "second", "microsecond", | |
"tzname", "tzoffset", "ampm","any_unused_tokens"] | |
def _parse(self, timestr, dayfirst=None, yearfirst=None, fuzzy=False, | |
fuzzy_with_tokens=False): | |
""" | |
Private method which performs the heavy lifting of parsing, called from | |
``parse()``, which passes on its ``kwargs`` to this function. | |
:param timestr: | |
The string to parse. | |
:param dayfirst: | |
Whether to interpret the first value in an ambiguous 3-integer date | |
(e.g. 01/05/09) as the day (``True``) or month (``False``). If | |
``yearfirst`` is set to ``True``, this distinguishes between YDM | |
and YMD. If set to ``None``, this value is retrieved from the | |
current :class:`parserinfo` object (which itself defaults to | |
``False``). | |
:param yearfirst: | |
Whether to interpret the first value in an ambiguous 3-integer date | |
(e.g. 01/05/09) as the year. If ``True``, the first number is taken | |
to be the year, otherwise the last number is taken to be the year. | |
If this is set to ``None``, the value is retrieved from the current | |
:class:`parserinfo` object (which itself defaults to ``False``). | |
:param fuzzy: | |
Whether to allow fuzzy parsing, allowing for string like "Today is | |
January 1, 2047 at 8:21:00AM". | |
:param fuzzy_with_tokens: | |
If ``True``, ``fuzzy`` is automatically set to True, and the parser | |
will return a tuple where the first element is the parsed | |
:class:`datetime.datetime` datetimestamp and the second element is | |
a tuple containing the portions of the string which were ignored: | |
.. doctest:: | |
>>> from dateutil.parser import parse | |
>>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) | |
(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) | |
""" | |
if fuzzy_with_tokens: | |
fuzzy = True | |
info = self.info | |
if dayfirst is None: | |
dayfirst = info.dayfirst | |
if yearfirst is None: | |
yearfirst = info.yearfirst | |
res = self._result() | |
l = _timelex.split(timestr) # Splits the timestr into tokens | |
skipped_idxs = [] | |
# year/month/day list | |
ymd = _ymd() | |
len_l = len(l) | |
i = 0 | |
try: | |
while i < len_l: | |
# Check if it's a number | |
value_repr = l[i] | |
try: | |
value = float(value_repr) | |
except ValueError: | |
value = None | |
if value is not None: | |
# Numeric token | |
i = self._parse_numeric_token(l, i, info, ymd, res, fuzzy) | |
# Check weekday | |
elif info.weekday(l[i]) is not None: | |
value = info.weekday(l[i]) | |
res.weekday = value | |
# Check month name | |
elif info.month(l[i]) is not None: | |
value = info.month(l[i]) | |
ymd.append(value, 'M') | |
if i + 1 < len_l: | |
if l[i + 1] in ('-', '/'): | |
# Jan-01[-99] | |
sep = l[i + 1] | |
ymd.append(l[i + 2]) | |
if i + 3 < len_l and l[i + 3] == sep: | |
# Jan-01-99 | |
ymd.append(l[i + 4]) | |
i += 2 | |
i += 2 | |
elif (i + 4 < len_l and l[i + 1] == l[i + 3] == ' ' and | |
info.pertain(l[i + 2])): | |
# Jan of 01 | |
# In this case, 01 is clearly year | |
if l[i + 4].isdigit(): | |
# Convert it here to become unambiguous | |
value = int(l[i + 4]) | |
year = str(info.convertyear(value)) | |
ymd.append(year, 'Y') | |
else: | |
# Wrong guess | |
pass | |
# TODO: not hit in tests | |
i += 4 | |
# Check am/pm | |
elif info.ampm(l[i]) is not None: | |
value = info.ampm(l[i]) | |
val_is_ampm = self._ampm_valid(res.hour, res.ampm, fuzzy) | |
if val_is_ampm: | |
res.hour = self._adjust_ampm(res.hour, value) | |
res.ampm = value | |
elif fuzzy: | |
skipped_idxs.append(i) | |
# Check for a timezone name | |
elif self._could_be_tzname(res.hour, res.tzname, res.tzoffset, l[i]): | |
res.tzname = l[i] | |
res.tzoffset = info.tzoffset(res.tzname) | |
# Check for something like GMT+3, or BRST+3. Notice | |
# that it doesn't mean "I am 3 hours after GMT", but | |
# "my time +3 is GMT". If found, we reverse the | |
# logic so that timezone parsing code will get it | |
# right. | |
if i + 1 < len_l and l[i + 1] in ('+', '-'): | |
l[i + 1] = ('+', '-')[l[i + 1] == '+'] | |
res.tzoffset = None | |
if info.utczone(res.tzname): | |
# With something like GMT+3, the timezone | |
# is *not* GMT. | |
res.tzname = None | |
# Check for a numbered timezone | |
elif res.hour is not None and l[i] in ('+', '-'): | |
signal = (-1, 1)[l[i] == '+'] | |
len_li = len(l[i + 1]) | |
# TODO: check that l[i + 1] is integer? | |
if len_li == 4: | |
# -0300 | |
hour_offset = int(l[i + 1][:2]) | |
min_offset = int(l[i + 1][2:]) | |
elif i + 2 < len_l and l[i + 2] == ':': | |
# -03:00 | |
hour_offset = int(l[i + 1]) | |
min_offset = int(l[i + 3]) # TODO: Check that l[i+3] is minute-like? | |
i += 2 | |
elif len_li <= 2: | |
# -[0]3 | |
hour_offset = int(l[i + 1][:2]) | |
min_offset = 0 | |
else: | |
raise ValueError(timestr) | |
res.tzoffset = signal * (hour_offset * 3600 + min_offset * 60) | |
# Look for a timezone name between parenthesis | |
if (i + 5 < len_l and | |
info.jump(l[i + 2]) and l[i + 3] == '(' and | |
l[i + 5] == ')' and | |
3 <= len(l[i + 4]) and | |
self._could_be_tzname(res.hour, res.tzname, | |
None, l[i + 4])): | |
# -0300 (BRST) | |
res.tzname = l[i + 4] | |
i += 4 | |
i += 1 | |
# Check jumps | |
elif not (info.jump(l[i]) or fuzzy): | |
raise ValueError(timestr) | |
else: | |
skipped_idxs.append(i) | |
i += 1 | |
# Process year/month/day | |
year, month, day = ymd.resolve_ymd(yearfirst, dayfirst) | |
res.century_specified = ymd.century_specified | |
res.year = year | |
res.month = month | |
res.day = day | |
except (IndexError, ValueError): | |
return None, None | |
if not info.validate(res): | |
return None, None | |
if fuzzy_with_tokens: | |
skipped_tokens = self._recombine_skipped(l, skipped_idxs) | |
return res, tuple(skipped_tokens) | |
else: | |
return res, None | |
def _parse_numeric_token(self, tokens, idx, info, ymd, res, fuzzy): | |
# Token is a number | |
value_repr = tokens[idx] | |
try: | |
value = self._to_decimal(value_repr) | |
except Exception as e: | |
six.raise_from(ValueError('Unknown numeric token'), e) | |
len_li = len(value_repr) | |
len_l = len(tokens) | |
if (len(ymd) == 3 and len_li in (2, 4) and | |
res.hour is None and | |
(idx + 1 >= len_l or | |
(tokens[idx + 1] != ':' and | |
info.hms(tokens[idx + 1]) is None))): | |
# 19990101T23[59] | |
s = tokens[idx] | |
res.hour = int(s[:2]) | |
if len_li == 4: | |
res.minute = int(s[2:]) | |
elif len_li == 6 or (len_li > 6 and tokens[idx].find('.') == 6): | |
# YYMMDD or HHMMSS[.ss] | |
s = tokens[idx] | |
if not ymd and '.' not in tokens[idx]: | |
ymd.append(s[:2]) | |
ymd.append(s[2:4]) | |
ymd.append(s[4:]) | |
else: | |
# 19990101T235959[.59] | |
# TODO: Check if res attributes already set. | |
res.hour = int(s[:2]) | |
res.minute = int(s[2:4]) | |
res.second, res.microsecond = self._parsems(s[4:]) | |
elif len_li in (8, 12, 14): | |
# YYYYMMDD | |
s = tokens[idx] | |
ymd.append(s[:4], 'Y') | |
ymd.append(s[4:6]) | |
ymd.append(s[6:8]) | |
if len_li > 8: | |
res.hour = int(s[8:10]) | |
res.minute = int(s[10:12]) | |
if len_li > 12: | |
res.second = int(s[12:]) | |
elif self._find_hms_idx(idx, tokens, info, allow_jump=True) is not None: | |
# HH[ ]h or MM[ ]m or SS[.ss][ ]s | |
hms_idx = self._find_hms_idx(idx, tokens, info, allow_jump=True) | |
(idx, hms) = self._parse_hms(idx, tokens, info, hms_idx) | |
if hms is not None: | |
# TODO: checking that hour/minute/second are not | |
# already set? | |
self._assign_hms(res, value_repr, hms) | |
elif idx + 2 < len_l and tokens[idx + 1] == ':': | |
# HH:MM[:SS[.ss]] | |
res.hour = int(value) | |
value = self._to_decimal(tokens[idx + 2]) # TODO: try/except for this? | |
(res.minute, res.second) = self._parse_min_sec(value) | |
if idx + 4 < len_l and tokens[idx + 3] == ':': | |
res.second, res.microsecond = self._parsems(tokens[idx + 4]) | |
idx += 2 | |
idx += 2 | |
elif idx + 1 < len_l and tokens[idx + 1] in ('-', '/', '.'): | |
sep = tokens[idx + 1] | |
ymd.append(value_repr) | |
if idx + 2 < len_l and not info.jump(tokens[idx + 2]): | |
if tokens[idx + 2].isdigit(): | |
# 01-01[-01] | |
ymd.append(tokens[idx + 2]) | |
else: | |
# 01-Jan[-01] | |
value = info.month(tokens[idx + 2]) | |
if value is not None: | |
ymd.append(value, 'M') | |
else: | |
raise ValueError() | |
if idx + 3 < len_l and tokens[idx + 3] == sep: | |
# We have three members | |
value = info.month(tokens[idx + 4]) | |
if value is not None: | |
ymd.append(value, 'M') | |
else: | |
ymd.append(tokens[idx + 4]) | |
idx += 2 | |
idx += 1 | |
idx += 1 | |
elif idx + 1 >= len_l or info.jump(tokens[idx + 1]): | |
if idx + 2 < len_l and info.ampm(tokens[idx + 2]) is not None: | |
# 12 am | |
hour = int(value) | |
res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 2])) | |
idx += 1 | |
else: | |
# Year, month or day | |
ymd.append(value) | |
idx += 1 | |
elif info.ampm(tokens[idx + 1]) is not None and (0 <= value < 24): | |
# 12am | |
hour = int(value) | |
res.hour = self._adjust_ampm(hour, info.ampm(tokens[idx + 1])) | |
idx += 1 | |
elif ymd.could_be_day(value): | |
ymd.append(value) | |
elif not fuzzy: | |
raise ValueError() | |
return idx | |
def _find_hms_idx(self, idx, tokens, info, allow_jump): | |
len_l = len(tokens) | |
if idx+1 < len_l and info.hms(tokens[idx+1]) is not None: | |
# There is an "h", "m", or "s" label following this token. We take | |
# assign the upcoming label to the current token. | |
# e.g. the "12" in 12h" | |
hms_idx = idx + 1 | |
elif (allow_jump and idx+2 < len_l and tokens[idx+1] == ' ' and | |
info.hms(tokens[idx+2]) is not None): | |
# There is a space and then an "h", "m", or "s" label. | |
# e.g. the "12" in "12 h" | |
hms_idx = idx + 2 | |
elif idx > 0 and info.hms(tokens[idx-1]) is not None: | |
# There is a "h", "m", or "s" preceding this token. Since neither | |
# of the previous cases was hit, there is no label following this | |
# token, so we use the previous label. | |
# e.g. the "04" in "12h04" | |
hms_idx = idx-1 | |
elif (1 < idx == len_l-1 and tokens[idx-1] == ' ' and | |
info.hms(tokens[idx-2]) is not None): | |
# If we are looking at the final token, we allow for a | |
# backward-looking check to skip over a space. | |
# TODO: Are we sure this is the right condition here? | |
hms_idx = idx - 2 | |
else: | |
hms_idx = None | |
return hms_idx | |
def _assign_hms(self, res, value_repr, hms): | |
# See GH issue #427, fixing float rounding | |
value = self._to_decimal(value_repr) | |
if hms == 0: | |
# Hour | |
res.hour = int(value) | |
if value % 1: | |
res.minute = int(60*(value % 1)) | |
elif hms == 1: | |
(res.minute, res.second) = self._parse_min_sec(value) | |
elif hms == 2: | |
(res.second, res.microsecond) = self._parsems(value_repr) | |
def _could_be_tzname(self, hour, tzname, tzoffset, token): | |
return (hour is not None and | |
tzname is None and | |
tzoffset is None and | |
len(token) <= 5 and | |
(all(x in string.ascii_uppercase for x in token) | |
or token in self.info.UTCZONE)) | |
def _ampm_valid(self, hour, ampm, fuzzy): | |
""" | |
For fuzzy parsing, 'a' or 'am' (both valid English words) | |
may erroneously trigger the AM/PM flag. Deal with that | |
here. | |
""" | |
val_is_ampm = True | |
# If there's already an AM/PM flag, this one isn't one. | |
if fuzzy and ampm is not None: | |
val_is_ampm = False | |
# If AM/PM is found and hour is not, raise a ValueError | |
if hour is None: | |
if fuzzy: | |
val_is_ampm = False | |
else: | |
raise ValueError('No hour specified with AM or PM flag.') | |
elif not 0 <= hour <= 12: | |
# If AM/PM is found, it's a 12 hour clock, so raise | |
# an error for invalid range | |
if fuzzy: | |
val_is_ampm = False | |
else: | |
raise ValueError('Invalid hour specified for 12-hour clock.') | |
return val_is_ampm | |
def _adjust_ampm(self, hour, ampm): | |
if hour < 12 and ampm == 1: | |
hour += 12 | |
elif hour == 12 and ampm == 0: | |
hour = 0 | |
return hour | |
def _parse_min_sec(self, value): | |
# TODO: Every usage of this function sets res.second to the return | |
# value. Are there any cases where second will be returned as None and | |
# we *don't* want to set res.second = None? | |
minute = int(value) | |
second = None | |
sec_remainder = value % 1 | |
if sec_remainder: | |
second = int(60 * sec_remainder) | |
return (minute, second) | |
def _parse_hms(self, idx, tokens, info, hms_idx): | |
# TODO: Is this going to admit a lot of false-positives for when we | |
# just happen to have digits and "h", "m" or "s" characters in non-date | |
# text? I guess hex hashes won't have that problem, but there's plenty | |
# of random junk out there. | |
if hms_idx is None: | |
hms = None | |
new_idx = idx | |
elif hms_idx > idx: | |
hms = info.hms(tokens[hms_idx]) | |
new_idx = hms_idx | |
else: | |
# Looking backwards, increment one. | |
hms = info.hms(tokens[hms_idx]) + 1 | |
new_idx = idx | |
return (new_idx, hms) | |
# ------------------------------------------------------------------ | |
# Handling for individual tokens. These are kept as methods instead | |
# of functions for the sake of customizability via subclassing. | |
def _parsems(self, value): | |
"""Parse a I[.F] seconds value into (seconds, microseconds).""" | |
if "." not in value: | |
return int(value), 0 | |
else: | |
i, f = value.split(".") | |
return int(i), int(f.ljust(6, "0")[:6]) | |
def _to_decimal(self, val): | |
try: | |
decimal_value = Decimal(val) | |
# See GH 662, edge case, infinite value should not be converted | |
# via `_to_decimal` | |
if not decimal_value.is_finite(): | |
raise ValueError("Converted decimal value is infinite or NaN") | |
except Exception as e: | |
msg = "Could not convert %s to decimal" % val | |
six.raise_from(ValueError(msg), e) | |
else: | |
return decimal_value | |
# ------------------------------------------------------------------ | |
# Post-Parsing construction of datetime output. These are kept as | |
# methods instead of functions for the sake of customizability via | |
# subclassing. | |
def _build_tzinfo(self, tzinfos, tzname, tzoffset): | |
if callable(tzinfos): | |
tzdata = tzinfos(tzname, tzoffset) | |
else: | |
tzdata = tzinfos.get(tzname) | |
# handle case where tzinfo is paased an options that returns None | |
# eg tzinfos = {'BRST' : None} | |
if isinstance(tzdata, datetime.tzinfo) or tzdata is None: | |
tzinfo = tzdata | |
elif isinstance(tzdata, text_type): | |
tzinfo = tz.tzstr(tzdata) | |
elif isinstance(tzdata, integer_types): | |
tzinfo = tz.tzoffset(tzname, tzdata) | |
else: | |
raise TypeError("Offset must be tzinfo subclass, tz string, " | |
"or int offset.") | |
return tzinfo | |
def _build_tzaware(self, naive, res, tzinfos): | |
if (callable(tzinfos) or (tzinfos and res.tzname in tzinfos)): | |
tzinfo = self._build_tzinfo(tzinfos, res.tzname, res.tzoffset) | |
aware = naive.replace(tzinfo=tzinfo) | |
aware = self._assign_tzname(aware, res.tzname) | |
elif res.tzname and res.tzname in time.tzname: | |
aware = naive.replace(tzinfo=tz.tzlocal()) | |
# Handle ambiguous local datetime | |
aware = self._assign_tzname(aware, res.tzname) | |
# This is mostly relevant for winter GMT zones parsed in the UK | |
if (aware.tzname() != res.tzname and | |
res.tzname in self.info.UTCZONE): | |
aware = aware.replace(tzinfo=tz.UTC) | |
elif res.tzoffset == 0: | |
aware = naive.replace(tzinfo=tz.UTC) | |
elif res.tzoffset: | |
aware = naive.replace(tzinfo=tz.tzoffset(res.tzname, res.tzoffset)) | |
elif not res.tzname and not res.tzoffset: | |
# i.e. no timezone information was found. | |
aware = naive | |
elif res.tzname: | |
# tz-like string was parsed but we don't know what to do | |
# with it | |
warnings.warn("tzname {tzname} identified but not understood. " | |
"Pass `tzinfos` argument in order to correctly " | |
"return a timezone-aware datetime. In a future " | |
"version, this will raise an " | |
"exception.".format(tzname=res.tzname), | |
category=UnknownTimezoneWarning) | |
aware = naive | |
return aware | |
def _build_naive(self, res, default): | |
repl = {} | |
for attr in ("year", "month", "day", "hour", | |
"minute", "second", "microsecond"): | |
value = getattr(res, attr) | |
if value is not None: | |
repl[attr] = value | |
if 'day' not in repl: | |
# If the default day exceeds the last day of the month, fall back | |
# to the end of the month. | |
cyear = default.year if res.year is None else res.year | |
cmonth = default.month if res.month is None else res.month | |
cday = default.day if res.day is None else res.day | |
if cday > monthrange(cyear, cmonth)[1]: | |
repl['day'] = monthrange(cyear, cmonth)[1] | |
naive = default.replace(**repl) | |
if res.weekday is not None and not res.day: | |
naive = naive + relativedelta.relativedelta(weekday=res.weekday) | |
return naive | |
def _assign_tzname(self, dt, tzname): | |
if dt.tzname() != tzname: | |
new_dt = tz.enfold(dt, fold=1) | |
if new_dt.tzname() == tzname: | |
return new_dt | |
return dt | |
def _recombine_skipped(self, tokens, skipped_idxs): | |
""" | |
>>> tokens = ["foo", " ", "bar", " ", "19June2000", "baz"] | |
>>> skipped_idxs = [0, 1, 2, 5] | |
>>> _recombine_skipped(tokens, skipped_idxs) | |
["foo bar", "baz"] | |
""" | |
skipped_tokens = [] | |
for i, idx in enumerate(sorted(skipped_idxs)): | |
if i > 0 and idx - 1 == skipped_idxs[i - 1]: | |
skipped_tokens[-1] = skipped_tokens[-1] + tokens[idx] | |
else: | |
skipped_tokens.append(tokens[idx]) | |
return skipped_tokens | |
DEFAULTPARSER = parser() | |
def parse(timestr, parserinfo=None, **kwargs): | |
""" | |
Parse a string in one of the supported formats, using the | |
``parserinfo`` parameters. | |
:param timestr: | |
A string containing a date/time stamp. | |
:param parserinfo: | |
A :class:`parserinfo` object containing parameters for the parser. | |
If ``None``, the default arguments to the :class:`parserinfo` | |
constructor are used. | |
The ``**kwargs`` parameter takes the following keyword arguments: | |
:param default: | |
The default datetime object, if this is a datetime object and not | |
``None``, elements specified in ``timestr`` replace elements in the | |
default object. | |
:param ignoretz: | |
If set ``True``, time zones in parsed strings are ignored and a naive | |
:class:`datetime` object is returned. | |
:param tzinfos: | |
Additional time zone names / aliases which may be present in the | |
string. This argument maps time zone names (and optionally offsets | |
from those time zones) to time zones. This parameter can be a | |
dictionary with timezone aliases mapping time zone names to time | |
zones or a function taking two parameters (``tzname`` and | |
``tzoffset``) and returning a time zone. | |
The timezones to which the names are mapped can be an integer | |
offset from UTC in seconds or a :class:`tzinfo` object. | |
.. doctest:: | |
:options: +NORMALIZE_WHITESPACE | |
>>> from dateutil.parser import parse | |
>>> from dateutil.tz import gettz | |
>>> tzinfos = {"BRST": -7200, "CST": gettz("America/Chicago")} | |
>>> parse("2012-01-19 17:21:00 BRST", tzinfos=tzinfos) | |
datetime.datetime(2012, 1, 19, 17, 21, tzinfo=tzoffset(u'BRST', -7200)) | |
>>> parse("2012-01-19 17:21:00 CST", tzinfos=tzinfos) | |
datetime.datetime(2012, 1, 19, 17, 21, | |
tzinfo=tzfile('/usr/share/zoneinfo/America/Chicago')) | |
This parameter is ignored if ``ignoretz`` is set. | |
:param dayfirst: | |
Whether to interpret the first value in an ambiguous 3-integer date | |
(e.g. 01/05/09) as the day (``True``) or month (``False``). If | |
``yearfirst`` is set to ``True``, this distinguishes between YDM and | |
YMD. If set to ``None``, this value is retrieved from the current | |
:class:`parserinfo` object (which itself defaults to ``False``). | |
:param yearfirst: | |
Whether to interpret the first value in an ambiguous 3-integer date | |
(e.g. 01/05/09) as the year. If ``True``, the first number is taken to | |
be the year, otherwise the last number is taken to be the year. If | |
this is set to ``None``, the value is retrieved from the current | |
:class:`parserinfo` object (which itself defaults to ``False``). | |
:param fuzzy: | |
Whether to allow fuzzy parsing, allowing for string like "Today is | |
January 1, 2047 at 8:21:00AM". | |
:param fuzzy_with_tokens: | |
If ``True``, ``fuzzy`` is automatically set to True, and the parser | |
will return a tuple where the first element is the parsed | |
:class:`datetime.datetime` datetimestamp and the second element is | |
a tuple containing the portions of the string which were ignored: | |
.. doctest:: | |
>>> from dateutil.parser import parse | |
>>> parse("Today is January 1, 2047 at 8:21:00AM", fuzzy_with_tokens=True) | |
(datetime.datetime(2047, 1, 1, 8, 21), (u'Today is ', u' ', u'at ')) | |
:return: | |
Returns a :class:`datetime.datetime` object or, if the | |
``fuzzy_with_tokens`` option is ``True``, returns a tuple, the | |
first element being a :class:`datetime.datetime` object, the second | |
a tuple containing the fuzzy tokens. | |
:raises ParserError: | |
Raised for invalid or unknown string formats, if the provided | |
:class:`tzinfo` is not in a valid format, or if an invalid date would | |
be created. | |
:raises OverflowError: | |
Raised if the parsed date exceeds the largest valid C integer on | |
your system. | |
""" | |
if parserinfo: | |
return parser(parserinfo).parse(timestr, **kwargs) | |
else: | |
return DEFAULTPARSER.parse(timestr, **kwargs) | |
class _tzparser(object): | |
class _result(_resultbase): | |
__slots__ = ["stdabbr", "stdoffset", "dstabbr", "dstoffset", | |
"start", "end"] | |
class _attr(_resultbase): | |
__slots__ = ["month", "week", "weekday", | |
"yday", "jyday", "day", "time"] | |
def __repr__(self): | |
return self._repr("") | |
def __init__(self): | |
_resultbase.__init__(self) | |
self.start = self._attr() | |
self.end = self._attr() | |
def parse(self, tzstr): | |
res = self._result() | |
l = [x for x in re.split(r'([,:.]|[a-zA-Z]+|[0-9]+)',tzstr) if x] | |
used_idxs = list() | |
try: | |
len_l = len(l) | |
i = 0 | |
while i < len_l: | |
# BRST+3[BRDT[+2]] | |
j = i | |
while j < len_l and not [x for x in l[j] | |
if x in "0123456789:,-+"]: | |
j += 1 | |
if j != i: | |
if not res.stdabbr: | |
offattr = "stdoffset" | |
res.stdabbr = "".join(l[i:j]) | |
else: | |
offattr = "dstoffset" | |
res.dstabbr = "".join(l[i:j]) | |
for ii in range(j): | |
used_idxs.append(ii) | |
i = j | |
if (i < len_l and (l[i] in ('+', '-') or l[i][0] in | |
"0123456789")): | |
if l[i] in ('+', '-'): | |
# Yes, that's right. See the TZ variable | |
# documentation. | |
signal = (1, -1)[l[i] == '+'] | |
used_idxs.append(i) | |
i += 1 | |
else: | |
signal = -1 | |
len_li = len(l[i]) | |
if len_li == 4: | |
# -0300 | |
setattr(res, offattr, (int(l[i][:2]) * 3600 + | |
int(l[i][2:]) * 60) * signal) | |
elif i + 1 < len_l and l[i + 1] == ':': | |
# -03:00 | |
setattr(res, offattr, | |
(int(l[i]) * 3600 + | |
int(l[i + 2]) * 60) * signal) | |
used_idxs.append(i) | |
i += 2 | |
elif len_li <= 2: | |
# -[0]3 | |
setattr(res, offattr, | |
int(l[i][:2]) * 3600 * signal) | |
else: | |
return None | |
used_idxs.append(i) | |
i += 1 | |
if res.dstabbr: | |
break | |
else: | |
break | |
if i < len_l: | |
for j in range(i, len_l): | |
if l[j] == ';': | |
l[j] = ',' | |
assert l[i] == ',' | |
i += 1 | |
if i >= len_l: | |
pass | |
elif (8 <= l.count(',') <= 9 and | |
not [y for x in l[i:] if x != ',' | |
for y in x if y not in "0123456789+-"]): | |
# GMT0BST,3,0,30,3600,10,0,26,7200[,3600] | |
for x in (res.start, res.end): | |
x.month = int(l[i]) | |
used_idxs.append(i) | |
i += 2 | |
if l[i] == '-': | |
value = int(l[i + 1]) * -1 | |
used_idxs.append(i) | |
i += 1 | |
else: | |
value = int(l[i]) | |
used_idxs.append(i) | |
i += 2 | |
if value: | |
x.week = value | |
x.weekday = (int(l[i]) - 1) % 7 | |
else: | |
x.day = int(l[i]) | |
used_idxs.append(i) | |
i += 2 | |
x.time = int(l[i]) | |
used_idxs.append(i) | |
i += 2 | |
if i < len_l: | |
if l[i] in ('-', '+'): | |
signal = (-1, 1)[l[i] == "+"] | |
used_idxs.append(i) | |
i += 1 | |
else: | |
signal = 1 | |
used_idxs.append(i) | |
res.dstoffset = (res.stdoffset + int(l[i]) * signal) | |
# This was a made-up format that is not in normal use | |
warn(('Parsed time zone "%s"' % tzstr) + | |
'is in a non-standard dateutil-specific format, which ' + | |
'is now deprecated; support for parsing this format ' + | |
'will be removed in future versions. It is recommended ' + | |
'that you switch to a standard format like the GNU ' + | |
'TZ variable format.', tz.DeprecatedTzFormatWarning) | |
elif (l.count(',') == 2 and l[i:].count('/') <= 2 and | |
not [y for x in l[i:] if x not in (',', '/', 'J', 'M', | |
'.', '-', ':') | |
for y in x if y not in "0123456789"]): | |
for x in (res.start, res.end): | |
if l[i] == 'J': | |
# non-leap year day (1 based) | |
used_idxs.append(i) | |
i += 1 | |
x.jyday = int(l[i]) | |
elif l[i] == 'M': | |
# month[-.]week[-.]weekday | |
used_idxs.append(i) | |
i += 1 | |
x.month = int(l[i]) | |
used_idxs.append(i) | |
i += 1 | |
assert l[i] in ('-', '.') | |
used_idxs.append(i) | |
i += 1 | |
x.week = int(l[i]) | |
if x.week == 5: | |
x.week = -1 | |
used_idxs.append(i) | |
i += 1 | |
assert l[i] in ('-', '.') | |
used_idxs.append(i) | |
i += 1 | |
x.weekday = (int(l[i]) - 1) % 7 | |
else: | |
# year day (zero based) | |
x.yday = int(l[i]) + 1 | |
used_idxs.append(i) | |
i += 1 | |
if i < len_l and l[i] == '/': | |
used_idxs.append(i) | |
i += 1 | |
# start time | |
len_li = len(l[i]) | |
if len_li == 4: | |
# -0300 | |
x.time = (int(l[i][:2]) * 3600 + | |
int(l[i][2:]) * 60) | |
elif i + 1 < len_l and l[i + 1] == ':': | |
# -03:00 | |
x.time = int(l[i]) * 3600 + int(l[i + 2]) * 60 | |
used_idxs.append(i) | |
i += 2 | |
if i + 1 < len_l and l[i + 1] == ':': | |
used_idxs.append(i) | |
i += 2 | |
x.time += int(l[i]) | |
elif len_li <= 2: | |
# -[0]3 | |
x.time = (int(l[i][:2]) * 3600) | |
else: | |
return None | |
used_idxs.append(i) | |
i += 1 | |
assert i == len_l or l[i] == ',' | |
i += 1 | |
assert i >= len_l | |
except (IndexError, ValueError, AssertionError): | |
return None | |
unused_idxs = set(range(len_l)).difference(used_idxs) | |
res.any_unused_tokens = not {l[n] for n in unused_idxs}.issubset({",",":"}) | |
return res | |
DEFAULTTZPARSER = _tzparser() | |
def _parsetz(tzstr): | |
return DEFAULTTZPARSER.parse(tzstr) | |
class ParserError(ValueError): | |
"""Exception subclass used for any failure to parse a datetime string. | |
This is a subclass of :py:exc:`ValueError`, and should be raised any time | |
earlier versions of ``dateutil`` would have raised ``ValueError``. | |
.. versionadded:: 2.8.1 | |
""" | |
def __str__(self): | |
try: | |
return self.args[0] % self.args[1:] | |
except (TypeError, IndexError): | |
return super(ParserError, self).__str__() | |
def __repr__(self): | |
args = ", ".join("'%s'" % arg for arg in self.args) | |
return "%s(%s)" % (self.__class__.__name__, args) | |
class UnknownTimezoneWarning(RuntimeWarning): | |
"""Raised when the parser finds a timezone it cannot parse into a tzinfo. | |
.. versionadded:: 2.7.0 | |
""" | |
# vim:ts=4:sw=4:et | |