|
"""Handwritten parser of dependency specifiers. |
|
|
|
The docstring for each __parse_* function contains EBNF-inspired grammar representing |
|
the implementation. |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
import ast |
|
from typing import NamedTuple, Sequence, Tuple, Union |
|
|
|
from ._tokenizer import DEFAULT_RULES, Tokenizer |
|
|
|
|
|
class Node: |
|
def __init__(self, value: str) -> None: |
|
self.value = value |
|
|
|
def __str__(self) -> str: |
|
return self.value |
|
|
|
def __repr__(self) -> str: |
|
return f"<{self.__class__.__name__}('{self}')>" |
|
|
|
def serialize(self) -> str: |
|
raise NotImplementedError |
|
|
|
|
|
class Variable(Node): |
|
def serialize(self) -> str: |
|
return str(self) |
|
|
|
|
|
class Value(Node): |
|
def serialize(self) -> str: |
|
return f'"{self}"' |
|
|
|
|
|
class Op(Node): |
|
def serialize(self) -> str: |
|
return str(self) |
|
|
|
|
|
MarkerVar = Union[Variable, Value] |
|
MarkerItem = Tuple[MarkerVar, Op, MarkerVar] |
|
MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]] |
|
MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]] |
|
|
|
|
|
class ParsedRequirement(NamedTuple): |
|
name: str |
|
url: str |
|
extras: list[str] |
|
specifier: str |
|
marker: MarkerList | None |
|
|
|
|
|
|
|
|
|
|
|
def parse_requirement(source: str) -> ParsedRequirement: |
|
return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) |
|
|
|
|
|
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: |
|
""" |
|
requirement = WS? IDENTIFIER WS? extras WS? requirement_details |
|
""" |
|
tokenizer.consume("WS") |
|
|
|
name_token = tokenizer.expect( |
|
"IDENTIFIER", expected="package name at the start of dependency specifier" |
|
) |
|
name = name_token.text |
|
tokenizer.consume("WS") |
|
|
|
extras = _parse_extras(tokenizer) |
|
tokenizer.consume("WS") |
|
|
|
url, specifier, marker = _parse_requirement_details(tokenizer) |
|
tokenizer.expect("END", expected="end of dependency specifier") |
|
|
|
return ParsedRequirement(name, url, extras, specifier, marker) |
|
|
|
|
|
def _parse_requirement_details( |
|
tokenizer: Tokenizer, |
|
) -> tuple[str, str, MarkerList | None]: |
|
""" |
|
requirement_details = AT URL (WS requirement_marker?)? |
|
| specifier WS? (requirement_marker)? |
|
""" |
|
|
|
specifier = "" |
|
url = "" |
|
marker = None |
|
|
|
if tokenizer.check("AT"): |
|
tokenizer.read() |
|
tokenizer.consume("WS") |
|
|
|
url_start = tokenizer.position |
|
url = tokenizer.expect("URL", expected="URL after @").text |
|
if tokenizer.check("END", peek=True): |
|
return (url, specifier, marker) |
|
|
|
tokenizer.expect("WS", expected="whitespace after URL") |
|
|
|
|
|
if tokenizer.check("END", peek=True): |
|
return (url, specifier, marker) |
|
|
|
marker = _parse_requirement_marker( |
|
tokenizer, span_start=url_start, after="URL and whitespace" |
|
) |
|
else: |
|
specifier_start = tokenizer.position |
|
specifier = _parse_specifier(tokenizer) |
|
tokenizer.consume("WS") |
|
|
|
if tokenizer.check("END", peek=True): |
|
return (url, specifier, marker) |
|
|
|
marker = _parse_requirement_marker( |
|
tokenizer, |
|
span_start=specifier_start, |
|
after=( |
|
"version specifier" |
|
if specifier |
|
else "name and no valid version specifier" |
|
), |
|
) |
|
|
|
return (url, specifier, marker) |
|
|
|
|
|
def _parse_requirement_marker( |
|
tokenizer: Tokenizer, *, span_start: int, after: str |
|
) -> MarkerList: |
|
""" |
|
requirement_marker = SEMICOLON marker WS? |
|
""" |
|
|
|
if not tokenizer.check("SEMICOLON"): |
|
tokenizer.raise_syntax_error( |
|
f"Expected end or semicolon (after {after})", |
|
span_start=span_start, |
|
) |
|
tokenizer.read() |
|
|
|
marker = _parse_marker(tokenizer) |
|
tokenizer.consume("WS") |
|
|
|
return marker |
|
|
|
|
|
def _parse_extras(tokenizer: Tokenizer) -> list[str]: |
|
""" |
|
extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? |
|
""" |
|
if not tokenizer.check("LEFT_BRACKET", peek=True): |
|
return [] |
|
|
|
with tokenizer.enclosing_tokens( |
|
"LEFT_BRACKET", |
|
"RIGHT_BRACKET", |
|
around="extras", |
|
): |
|
tokenizer.consume("WS") |
|
extras = _parse_extras_list(tokenizer) |
|
tokenizer.consume("WS") |
|
|
|
return extras |
|
|
|
|
|
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]: |
|
""" |
|
extras_list = identifier (wsp* ',' wsp* identifier)* |
|
""" |
|
extras: list[str] = [] |
|
|
|
if not tokenizer.check("IDENTIFIER"): |
|
return extras |
|
|
|
extras.append(tokenizer.read().text) |
|
|
|
while True: |
|
tokenizer.consume("WS") |
|
if tokenizer.check("IDENTIFIER", peek=True): |
|
tokenizer.raise_syntax_error("Expected comma between extra names") |
|
elif not tokenizer.check("COMMA"): |
|
break |
|
|
|
tokenizer.read() |
|
tokenizer.consume("WS") |
|
|
|
extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") |
|
extras.append(extra_token.text) |
|
|
|
return extras |
|
|
|
|
|
def _parse_specifier(tokenizer: Tokenizer) -> str: |
|
""" |
|
specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS |
|
| WS? version_many WS? |
|
""" |
|
with tokenizer.enclosing_tokens( |
|
"LEFT_PARENTHESIS", |
|
"RIGHT_PARENTHESIS", |
|
around="version specifier", |
|
): |
|
tokenizer.consume("WS") |
|
parsed_specifiers = _parse_version_many(tokenizer) |
|
tokenizer.consume("WS") |
|
|
|
return parsed_specifiers |
|
|
|
|
|
def _parse_version_many(tokenizer: Tokenizer) -> str: |
|
""" |
|
version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? |
|
""" |
|
parsed_specifiers = "" |
|
while tokenizer.check("SPECIFIER"): |
|
span_start = tokenizer.position |
|
parsed_specifiers += tokenizer.read().text |
|
if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): |
|
tokenizer.raise_syntax_error( |
|
".* suffix can only be used with `==` or `!=` operators", |
|
span_start=span_start, |
|
span_end=tokenizer.position + 1, |
|
) |
|
if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): |
|
tokenizer.raise_syntax_error( |
|
"Local version label can only be used with `==` or `!=` operators", |
|
span_start=span_start, |
|
span_end=tokenizer.position, |
|
) |
|
tokenizer.consume("WS") |
|
if not tokenizer.check("COMMA"): |
|
break |
|
parsed_specifiers += tokenizer.read().text |
|
tokenizer.consume("WS") |
|
|
|
return parsed_specifiers |
|
|
|
|
|
|
|
|
|
|
|
def parse_marker(source: str) -> MarkerList: |
|
return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) |
|
|
|
|
|
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: |
|
retval = _parse_marker(tokenizer) |
|
tokenizer.expect("END", expected="end of marker expression") |
|
return retval |
|
|
|
|
|
def _parse_marker(tokenizer: Tokenizer) -> MarkerList: |
|
""" |
|
marker = marker_atom (BOOLOP marker_atom)+ |
|
""" |
|
expression = [_parse_marker_atom(tokenizer)] |
|
while tokenizer.check("BOOLOP"): |
|
token = tokenizer.read() |
|
expr_right = _parse_marker_atom(tokenizer) |
|
expression.extend((token.text, expr_right)) |
|
return expression |
|
|
|
|
|
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: |
|
""" |
|
marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? |
|
| WS? marker_item WS? |
|
""" |
|
|
|
tokenizer.consume("WS") |
|
if tokenizer.check("LEFT_PARENTHESIS", peek=True): |
|
with tokenizer.enclosing_tokens( |
|
"LEFT_PARENTHESIS", |
|
"RIGHT_PARENTHESIS", |
|
around="marker expression", |
|
): |
|
tokenizer.consume("WS") |
|
marker: MarkerAtom = _parse_marker(tokenizer) |
|
tokenizer.consume("WS") |
|
else: |
|
marker = _parse_marker_item(tokenizer) |
|
tokenizer.consume("WS") |
|
return marker |
|
|
|
|
|
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: |
|
""" |
|
marker_item = WS? marker_var WS? marker_op WS? marker_var WS? |
|
""" |
|
tokenizer.consume("WS") |
|
marker_var_left = _parse_marker_var(tokenizer) |
|
tokenizer.consume("WS") |
|
marker_op = _parse_marker_op(tokenizer) |
|
tokenizer.consume("WS") |
|
marker_var_right = _parse_marker_var(tokenizer) |
|
tokenizer.consume("WS") |
|
return (marker_var_left, marker_op, marker_var_right) |
|
|
|
|
|
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: |
|
""" |
|
marker_var = VARIABLE | QUOTED_STRING |
|
""" |
|
if tokenizer.check("VARIABLE"): |
|
return process_env_var(tokenizer.read().text.replace(".", "_")) |
|
elif tokenizer.check("QUOTED_STRING"): |
|
return process_python_str(tokenizer.read().text) |
|
else: |
|
tokenizer.raise_syntax_error( |
|
message="Expected a marker variable or quoted string" |
|
) |
|
|
|
|
|
def process_env_var(env_var: str) -> Variable: |
|
if env_var in ("platform_python_implementation", "python_implementation"): |
|
return Variable("platform_python_implementation") |
|
else: |
|
return Variable(env_var) |
|
|
|
|
|
def process_python_str(python_str: str) -> Value: |
|
value = ast.literal_eval(python_str) |
|
return Value(str(value)) |
|
|
|
|
|
def _parse_marker_op(tokenizer: Tokenizer) -> Op: |
|
""" |
|
marker_op = IN | NOT IN | OP |
|
""" |
|
if tokenizer.check("IN"): |
|
tokenizer.read() |
|
return Op("in") |
|
elif tokenizer.check("NOT"): |
|
tokenizer.read() |
|
tokenizer.expect("WS", expected="whitespace after 'not'") |
|
tokenizer.expect("IN", expected="'in' after 'not'") |
|
return Op("not in") |
|
elif tokenizer.check("OP"): |
|
return Op(tokenizer.read().text) |
|
else: |
|
return tokenizer.raise_syntax_error( |
|
"Expected marker operator, one of " |
|
"<=, <, !=, ==, >=, >, ~=, ===, in, not in" |
|
) |
|
|