|
from __future__ import annotations |
|
|
|
import contextlib |
|
import re |
|
from dataclasses import dataclass |
|
from typing import Iterator, NoReturn |
|
|
|
from .specifiers import Specifier |
|
|
|
|
|
@dataclass |
|
class Token: |
|
name: str |
|
text: str |
|
position: int |
|
|
|
|
|
class ParserSyntaxError(Exception): |
|
"""The provided source text could not be parsed correctly.""" |
|
|
|
def __init__( |
|
self, |
|
message: str, |
|
*, |
|
source: str, |
|
span: tuple[int, int], |
|
) -> None: |
|
self.span = span |
|
self.message = message |
|
self.source = source |
|
|
|
super().__init__() |
|
|
|
def __str__(self) -> str: |
|
marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" |
|
return "\n ".join([self.message, self.source, marker]) |
|
|
|
|
|
DEFAULT_RULES: dict[str, str | re.Pattern[str]] = { |
|
"LEFT_PARENTHESIS": r"\(", |
|
"RIGHT_PARENTHESIS": r"\)", |
|
"LEFT_BRACKET": r"\[", |
|
"RIGHT_BRACKET": r"\]", |
|
"SEMICOLON": r";", |
|
"COMMA": r",", |
|
"QUOTED_STRING": re.compile( |
|
r""" |
|
( |
|
('[^']*') |
|
| |
|
("[^"]*") |
|
) |
|
""", |
|
re.VERBOSE, |
|
), |
|
"OP": r"(===|==|~=|!=|<=|>=|<|>)", |
|
"BOOLOP": r"\b(or|and)\b", |
|
"IN": r"\bin\b", |
|
"NOT": r"\bnot\b", |
|
"VARIABLE": re.compile( |
|
r""" |
|
\b( |
|
python_version |
|
|python_full_version |
|
|os[._]name |
|
|sys[._]platform |
|
|platform_(release|system) |
|
|platform[._](version|machine|python_implementation) |
|
|python_implementation |
|
|implementation_(name|version) |
|
|extra |
|
)\b |
|
""", |
|
re.VERBOSE, |
|
), |
|
"SPECIFIER": re.compile( |
|
Specifier._operator_regex_str + Specifier._version_regex_str, |
|
re.VERBOSE | re.IGNORECASE, |
|
), |
|
"AT": r"\@", |
|
"URL": r"[^ \t]+", |
|
"IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", |
|
"VERSION_PREFIX_TRAIL": r"\.\*", |
|
"VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", |
|
"WS": r"[ \t]+", |
|
"END": r"$", |
|
} |
|
|
|
|
|
class Tokenizer: |
|
"""Context-sensitive token parsing. |
|
|
|
Provides methods to examine the input stream to check whether the next token |
|
matches. |
|
""" |
|
|
|
def __init__( |
|
self, |
|
source: str, |
|
*, |
|
rules: dict[str, str | re.Pattern[str]], |
|
) -> None: |
|
self.source = source |
|
self.rules: dict[str, re.Pattern[str]] = { |
|
name: re.compile(pattern) for name, pattern in rules.items() |
|
} |
|
self.next_token: Token | None = None |
|
self.position = 0 |
|
|
|
def consume(self, name: str) -> None: |
|
"""Move beyond provided token name, if at current position.""" |
|
if self.check(name): |
|
self.read() |
|
|
|
def check(self, name: str, *, peek: bool = False) -> bool: |
|
"""Check whether the next token has the provided name. |
|
|
|
By default, if the check succeeds, the token *must* be read before |
|
another check. If `peek` is set to `True`, the token is not loaded and |
|
would need to be checked again. |
|
""" |
|
assert ( |
|
self.next_token is None |
|
), f"Cannot check for {name!r}, already have {self.next_token!r}" |
|
assert name in self.rules, f"Unknown token name: {name!r}" |
|
|
|
expression = self.rules[name] |
|
|
|
match = expression.match(self.source, self.position) |
|
if match is None: |
|
return False |
|
if not peek: |
|
self.next_token = Token(name, match[0], self.position) |
|
return True |
|
|
|
def expect(self, name: str, *, expected: str) -> Token: |
|
"""Expect a certain token name next, failing with a syntax error otherwise. |
|
|
|
The token is *not* read. |
|
""" |
|
if not self.check(name): |
|
raise self.raise_syntax_error(f"Expected {expected}") |
|
return self.read() |
|
|
|
def read(self) -> Token: |
|
"""Consume the next token and return it.""" |
|
token = self.next_token |
|
assert token is not None |
|
|
|
self.position += len(token.text) |
|
self.next_token = None |
|
|
|
return token |
|
|
|
def raise_syntax_error( |
|
self, |
|
message: str, |
|
*, |
|
span_start: int | None = None, |
|
span_end: int | None = None, |
|
) -> NoReturn: |
|
"""Raise ParserSyntaxError at the given position.""" |
|
span = ( |
|
self.position if span_start is None else span_start, |
|
self.position if span_end is None else span_end, |
|
) |
|
raise ParserSyntaxError( |
|
message, |
|
source=self.source, |
|
span=span, |
|
) |
|
|
|
@contextlib.contextmanager |
|
def enclosing_tokens( |
|
self, open_token: str, close_token: str, *, around: str |
|
) -> Iterator[None]: |
|
if self.check(open_token): |
|
open_position = self.position |
|
self.read() |
|
else: |
|
open_position = None |
|
|
|
yield |
|
|
|
if open_position is None: |
|
return |
|
|
|
if not self.check(close_token): |
|
self.raise_syntax_error( |
|
f"Expected matching {close_token} for {open_token}, after {around}", |
|
span_start=open_position, |
|
) |
|
|
|
self.read() |
|
|