|
from __future__ import annotations |
|
|
|
import typing |
|
from urllib.parse import parse_qs, unquote |
|
|
|
import idna |
|
|
|
from ._types import QueryParamTypes, RawURL, URLTypes |
|
from ._urlparse import urlencode, urlparse |
|
from ._utils import primitive_value_to_str |
|
|
|
|
|
class URL: |
|
""" |
|
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink") |
|
|
|
assert url.scheme == "https" |
|
assert url.username == "jo@email.com" |
|
assert url.password == "a secret" |
|
assert url.userinfo == b"jo%40email.com:a%20secret" |
|
assert url.host == "müller.de" |
|
assert url.raw_host == b"xn--mller-kva.de" |
|
assert url.port == 1234 |
|
assert url.netloc == b"xn--mller-kva.de:1234" |
|
assert url.path == "/pa th" |
|
assert url.query == b"?search=ab" |
|
assert url.raw_path == b"/pa%20th?search=ab" |
|
assert url.fragment == "anchorlink" |
|
|
|
The components of a URL are broken down like this: |
|
|
|
https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink |
|
[scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment] |
|
[ userinfo ] [ netloc ][ raw_path ] |
|
|
|
Note that: |
|
|
|
* `url.scheme` is normalized to always be lowercased. |
|
|
|
* `url.host` is normalized to always be lowercased. Internationalized domain |
|
names are represented in unicode, without IDNA encoding applied. For instance: |
|
|
|
url = httpx.URL("http://中国.icom.museum") |
|
assert url.host == "中国.icom.museum" |
|
url = httpx.URL("http://xn--fiqs8s.icom.museum") |
|
assert url.host == "中国.icom.museum" |
|
|
|
* `url.raw_host` is normalized to always be lowercased, and is IDNA encoded. |
|
|
|
url = httpx.URL("http://中国.icom.museum") |
|
assert url.raw_host == b"xn--fiqs8s.icom.museum" |
|
url = httpx.URL("http://xn--fiqs8s.icom.museum") |
|
assert url.raw_host == b"xn--fiqs8s.icom.museum" |
|
|
|
* `url.port` is either None or an integer. URLs that include the default port for |
|
"http", "https", "ws", "wss", and "ftp" schemes have their port |
|
normalized to `None`. |
|
|
|
assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80") |
|
assert httpx.URL("http://example.com").port is None |
|
assert httpx.URL("http://example.com:80").port is None |
|
|
|
* `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work |
|
with `url.username` and `url.password` instead, which handle the URL escaping. |
|
|
|
* `url.raw_path` is raw bytes of both the path and query, without URL escaping. |
|
This portion is used as the target when constructing HTTP requests. Usually you'll |
|
want to work with `url.path` instead. |
|
|
|
* `url.query` is raw bytes, without URL escaping. A URL query string portion can |
|
only be properly URL escaped when decoding the parameter names and values |
|
themselves. |
|
""" |
|
|
|
def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None: |
|
if kwargs: |
|
allowed = { |
|
"scheme": str, |
|
"username": str, |
|
"password": str, |
|
"userinfo": bytes, |
|
"host": str, |
|
"port": int, |
|
"netloc": bytes, |
|
"path": str, |
|
"query": bytes, |
|
"raw_path": bytes, |
|
"fragment": str, |
|
"params": object, |
|
} |
|
|
|
|
|
for key, value in kwargs.items(): |
|
if key not in allowed: |
|
message = f"{key!r} is an invalid keyword argument for URL()" |
|
raise TypeError(message) |
|
if value is not None and not isinstance(value, allowed[key]): |
|
expected = allowed[key].__name__ |
|
seen = type(value).__name__ |
|
message = f"Argument {key!r} must be {expected} but got {seen}" |
|
raise TypeError(message) |
|
if isinstance(value, bytes): |
|
kwargs[key] = value.decode("ascii") |
|
|
|
if "params" in kwargs: |
|
|
|
|
|
|
|
|
|
|
|
params = kwargs.pop("params") |
|
kwargs["query"] = None if not params else str(QueryParams(params)) |
|
|
|
if isinstance(url, str): |
|
self._uri_reference = urlparse(url, **kwargs) |
|
elif isinstance(url, URL): |
|
self._uri_reference = url._uri_reference.copy_with(**kwargs) |
|
else: |
|
raise TypeError( |
|
"Invalid type for url. Expected str or httpx.URL," |
|
f" got {type(url)}: {url!r}" |
|
) |
|
|
|
@property |
|
def scheme(self) -> str: |
|
""" |
|
The URL scheme, such as "http", "https". |
|
Always normalised to lowercase. |
|
""" |
|
return self._uri_reference.scheme |
|
|
|
@property |
|
def raw_scheme(self) -> bytes: |
|
""" |
|
The raw bytes representation of the URL scheme, such as b"http", b"https". |
|
Always normalised to lowercase. |
|
""" |
|
return self._uri_reference.scheme.encode("ascii") |
|
|
|
@property |
|
def userinfo(self) -> bytes: |
|
""" |
|
The URL userinfo as a raw bytestring. |
|
For example: b"jo%40email.com:a%20secret". |
|
""" |
|
return self._uri_reference.userinfo.encode("ascii") |
|
|
|
@property |
|
def username(self) -> str: |
|
""" |
|
The URL username as a string, with URL decoding applied. |
|
For example: "jo@email.com" |
|
""" |
|
userinfo = self._uri_reference.userinfo |
|
return unquote(userinfo.partition(":")[0]) |
|
|
|
@property |
|
def password(self) -> str: |
|
""" |
|
The URL password as a string, with URL decoding applied. |
|
For example: "a secret" |
|
""" |
|
userinfo = self._uri_reference.userinfo |
|
return unquote(userinfo.partition(":")[2]) |
|
|
|
@property |
|
def host(self) -> str: |
|
""" |
|
The URL host as a string. |
|
Always normalized to lowercase, with IDNA hosts decoded into unicode. |
|
|
|
Examples: |
|
|
|
url = httpx.URL("http://www.EXAMPLE.org") |
|
assert url.host == "www.example.org" |
|
|
|
url = httpx.URL("http://中国.icom.museum") |
|
assert url.host == "中国.icom.museum" |
|
|
|
url = httpx.URL("http://xn--fiqs8s.icom.museum") |
|
assert url.host == "中国.icom.museum" |
|
|
|
url = httpx.URL("https://[::ffff:192.168.0.1]") |
|
assert url.host == "::ffff:192.168.0.1" |
|
""" |
|
host: str = self._uri_reference.host |
|
|
|
if host.startswith("xn--"): |
|
host = idna.decode(host) |
|
|
|
return host |
|
|
|
@property |
|
def raw_host(self) -> bytes: |
|
""" |
|
The raw bytes representation of the URL host. |
|
Always normalized to lowercase, and IDNA encoded. |
|
|
|
Examples: |
|
|
|
url = httpx.URL("http://www.EXAMPLE.org") |
|
assert url.raw_host == b"www.example.org" |
|
|
|
url = httpx.URL("http://中国.icom.museum") |
|
assert url.raw_host == b"xn--fiqs8s.icom.museum" |
|
|
|
url = httpx.URL("http://xn--fiqs8s.icom.museum") |
|
assert url.raw_host == b"xn--fiqs8s.icom.museum" |
|
|
|
url = httpx.URL("https://[::ffff:192.168.0.1]") |
|
assert url.raw_host == b"::ffff:192.168.0.1" |
|
""" |
|
return self._uri_reference.host.encode("ascii") |
|
|
|
@property |
|
def port(self) -> int | None: |
|
""" |
|
The URL port as an integer. |
|
|
|
Note that the URL class performs port normalization as per the WHATWG spec. |
|
Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always |
|
treated as `None`. |
|
|
|
For example: |
|
|
|
assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80") |
|
assert httpx.URL("http://www.example.com:80").port is None |
|
""" |
|
return self._uri_reference.port |
|
|
|
@property |
|
def netloc(self) -> bytes: |
|
""" |
|
Either `<host>` or `<host>:<port>` as bytes. |
|
Always normalized to lowercase, and IDNA encoded. |
|
|
|
This property may be used for generating the value of a request |
|
"Host" header. |
|
""" |
|
return self._uri_reference.netloc.encode("ascii") |
|
|
|
@property |
|
def path(self) -> str: |
|
""" |
|
The URL path as a string. Excluding the query string, and URL decoded. |
|
|
|
For example: |
|
|
|
url = httpx.URL("https://example.com/pa%20th") |
|
assert url.path == "/pa th" |
|
""" |
|
path = self._uri_reference.path or "/" |
|
return unquote(path) |
|
|
|
@property |
|
def query(self) -> bytes: |
|
""" |
|
The URL query string, as raw bytes, excluding the leading b"?". |
|
|
|
This is necessarily a bytewise interface, because we cannot |
|
perform URL decoding of this representation until we've parsed |
|
the keys and values into a QueryParams instance. |
|
|
|
For example: |
|
|
|
url = httpx.URL("https://example.com/?filter=some%20search%20terms") |
|
assert url.query == b"filter=some%20search%20terms" |
|
""" |
|
query = self._uri_reference.query or "" |
|
return query.encode("ascii") |
|
|
|
@property |
|
def params(self) -> QueryParams: |
|
""" |
|
The URL query parameters, neatly parsed and packaged into an immutable |
|
multidict representation. |
|
""" |
|
return QueryParams(self._uri_reference.query) |
|
|
|
@property |
|
def raw_path(self) -> bytes: |
|
""" |
|
The complete URL path and query string as raw bytes. |
|
Used as the target when constructing HTTP requests. |
|
|
|
For example: |
|
|
|
GET /users?search=some%20text HTTP/1.1 |
|
Host: www.example.org |
|
Connection: close |
|
""" |
|
path = self._uri_reference.path or "/" |
|
if self._uri_reference.query is not None: |
|
path += "?" + self._uri_reference.query |
|
return path.encode("ascii") |
|
|
|
@property |
|
def fragment(self) -> str: |
|
""" |
|
The URL fragments, as used in HTML anchors. |
|
As a string, without the leading '#'. |
|
""" |
|
return unquote(self._uri_reference.fragment or "") |
|
|
|
@property |
|
def raw(self) -> RawURL: |
|
""" |
|
Provides the (scheme, host, port, target) for the outgoing request. |
|
|
|
In older versions of `httpx` this was used in the low-level transport API. |
|
We no longer use `RawURL`, and this property will be deprecated |
|
in a future release. |
|
""" |
|
return RawURL( |
|
self.raw_scheme, |
|
self.raw_host, |
|
self.port, |
|
self.raw_path, |
|
) |
|
|
|
@property |
|
def is_absolute_url(self) -> bool: |
|
""" |
|
Return `True` for absolute URLs such as 'http://example.com/path', |
|
and `False` for relative URLs such as '/path'. |
|
""" |
|
|
|
|
|
|
|
|
|
return bool(self._uri_reference.scheme and self._uri_reference.host) |
|
|
|
@property |
|
def is_relative_url(self) -> bool: |
|
""" |
|
Return `False` for absolute URLs such as 'http://example.com/path', |
|
and `True` for relative URLs such as '/path'. |
|
""" |
|
return not self.is_absolute_url |
|
|
|
def copy_with(self, **kwargs: typing.Any) -> URL: |
|
""" |
|
Copy this URL, returning a new URL with some components altered. |
|
Accepts the same set of parameters as the components that are made |
|
available via properties on the `URL` class. |
|
|
|
For example: |
|
|
|
url = httpx.URL("https://www.example.com").copy_with( |
|
username="jo@gmail.com", password="a secret" |
|
) |
|
assert url == "https://jo%40email.com:a%20secret@www.example.com" |
|
""" |
|
return URL(self, **kwargs) |
|
|
|
def copy_set_param(self, key: str, value: typing.Any = None) -> URL: |
|
return self.copy_with(params=self.params.set(key, value)) |
|
|
|
def copy_add_param(self, key: str, value: typing.Any = None) -> URL: |
|
return self.copy_with(params=self.params.add(key, value)) |
|
|
|
def copy_remove_param(self, key: str) -> URL: |
|
return self.copy_with(params=self.params.remove(key)) |
|
|
|
def copy_merge_params(self, params: QueryParamTypes) -> URL: |
|
return self.copy_with(params=self.params.merge(params)) |
|
|
|
def join(self, url: URLTypes) -> URL: |
|
""" |
|
Return an absolute URL, using this URL as the base. |
|
|
|
Eg. |
|
|
|
url = httpx.URL("https://www.example.com/test") |
|
url = url.join("/new/path") |
|
assert url == "https://www.example.com/new/path" |
|
""" |
|
from urllib.parse import urljoin |
|
|
|
return URL(urljoin(str(self), str(URL(url)))) |
|
|
|
def __hash__(self) -> int: |
|
return hash(str(self)) |
|
|
|
def __eq__(self, other: typing.Any) -> bool: |
|
return isinstance(other, (URL, str)) and str(self) == str(URL(other)) |
|
|
|
def __str__(self) -> str: |
|
return str(self._uri_reference) |
|
|
|
def __repr__(self) -> str: |
|
scheme, userinfo, host, port, path, query, fragment = self._uri_reference |
|
|
|
if ":" in userinfo: |
|
|
|
userinfo = f'{userinfo.split(":")[0]}:[secure]' |
|
|
|
authority = "".join( |
|
[ |
|
f"{userinfo}@" if userinfo else "", |
|
f"[{host}]" if ":" in host else host, |
|
f":{port}" if port is not None else "", |
|
] |
|
) |
|
url = "".join( |
|
[ |
|
f"{self.scheme}:" if scheme else "", |
|
f"//{authority}" if authority else "", |
|
path, |
|
f"?{query}" if query is not None else "", |
|
f"#{fragment}" if fragment is not None else "", |
|
] |
|
) |
|
|
|
return f"{self.__class__.__name__}({url!r})" |
|
|
|
|
|
class QueryParams(typing.Mapping[str, str]): |
|
""" |
|
URL query parameters, as a multi-dict. |
|
""" |
|
|
|
def __init__(self, *args: QueryParamTypes | None, **kwargs: typing.Any) -> None: |
|
assert len(args) < 2, "Too many arguments." |
|
assert not (args and kwargs), "Cannot mix named and unnamed arguments." |
|
|
|
value = args[0] if args else kwargs |
|
|
|
if value is None or isinstance(value, (str, bytes)): |
|
value = value.decode("ascii") if isinstance(value, bytes) else value |
|
self._dict = parse_qs(value, keep_blank_values=True) |
|
elif isinstance(value, QueryParams): |
|
self._dict = {k: list(v) for k, v in value._dict.items()} |
|
else: |
|
dict_value: dict[typing.Any, list[typing.Any]] = {} |
|
if isinstance(value, (list, tuple)): |
|
|
|
|
|
|
|
|
|
for item in value: |
|
dict_value.setdefault(item[0], []).append(item[1]) |
|
else: |
|
|
|
|
|
|
|
|
|
dict_value = { |
|
k: list(v) if isinstance(v, (list, tuple)) else [v] |
|
for k, v in value.items() |
|
} |
|
|
|
|
|
|
|
|
|
self._dict = { |
|
str(k): [primitive_value_to_str(item) for item in v] |
|
for k, v in dict_value.items() |
|
} |
|
|
|
def keys(self) -> typing.KeysView[str]: |
|
""" |
|
Return all the keys in the query params. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123&a=456&b=789") |
|
assert list(q.keys()) == ["a", "b"] |
|
""" |
|
return self._dict.keys() |
|
|
|
def values(self) -> typing.ValuesView[str]: |
|
""" |
|
Return all the values in the query params. If a key occurs more than once |
|
only the first item for that key is returned. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123&a=456&b=789") |
|
assert list(q.values()) == ["123", "789"] |
|
""" |
|
return {k: v[0] for k, v in self._dict.items()}.values() |
|
|
|
def items(self) -> typing.ItemsView[str, str]: |
|
""" |
|
Return all items in the query params. If a key occurs more than once |
|
only the first item for that key is returned. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123&a=456&b=789") |
|
assert list(q.items()) == [("a", "123"), ("b", "789")] |
|
""" |
|
return {k: v[0] for k, v in self._dict.items()}.items() |
|
|
|
def multi_items(self) -> list[tuple[str, str]]: |
|
""" |
|
Return all items in the query params. Allow duplicate keys to occur. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123&a=456&b=789") |
|
assert list(q.multi_items()) == [("a", "123"), ("a", "456"), ("b", "789")] |
|
""" |
|
multi_items: list[tuple[str, str]] = [] |
|
for k, v in self._dict.items(): |
|
multi_items.extend([(k, i) for i in v]) |
|
return multi_items |
|
|
|
def get(self, key: typing.Any, default: typing.Any = None) -> typing.Any: |
|
""" |
|
Get a value from the query param for a given key. If the key occurs |
|
more than once, then only the first value is returned. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123&a=456&b=789") |
|
assert q.get("a") == "123" |
|
""" |
|
if key in self._dict: |
|
return self._dict[str(key)][0] |
|
return default |
|
|
|
def get_list(self, key: str) -> list[str]: |
|
""" |
|
Get all values from the query param for a given key. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123&a=456&b=789") |
|
assert q.get_list("a") == ["123", "456"] |
|
""" |
|
return list(self._dict.get(str(key), [])) |
|
|
|
def set(self, key: str, value: typing.Any = None) -> QueryParams: |
|
""" |
|
Return a new QueryParams instance, setting the value of a key. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123") |
|
q = q.set("a", "456") |
|
assert q == httpx.QueryParams("a=456") |
|
""" |
|
q = QueryParams() |
|
q._dict = dict(self._dict) |
|
q._dict[str(key)] = [primitive_value_to_str(value)] |
|
return q |
|
|
|
def add(self, key: str, value: typing.Any = None) -> QueryParams: |
|
""" |
|
Return a new QueryParams instance, setting or appending the value of a key. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123") |
|
q = q.add("a", "456") |
|
assert q == httpx.QueryParams("a=123&a=456") |
|
""" |
|
q = QueryParams() |
|
q._dict = dict(self._dict) |
|
q._dict[str(key)] = q.get_list(key) + [primitive_value_to_str(value)] |
|
return q |
|
|
|
def remove(self, key: str) -> QueryParams: |
|
""" |
|
Return a new QueryParams instance, removing the value of a key. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123") |
|
q = q.remove("a") |
|
assert q == httpx.QueryParams("") |
|
""" |
|
q = QueryParams() |
|
q._dict = dict(self._dict) |
|
q._dict.pop(str(key), None) |
|
return q |
|
|
|
def merge(self, params: QueryParamTypes | None = None) -> QueryParams: |
|
""" |
|
Return a new QueryParams instance, updated with. |
|
|
|
Usage: |
|
|
|
q = httpx.QueryParams("a=123") |
|
q = q.merge({"b": "456"}) |
|
assert q == httpx.QueryParams("a=123&b=456") |
|
|
|
q = httpx.QueryParams("a=123") |
|
q = q.merge({"a": "456", "b": "789"}) |
|
assert q == httpx.QueryParams("a=456&b=789") |
|
""" |
|
q = QueryParams(params) |
|
q._dict = {**self._dict, **q._dict} |
|
return q |
|
|
|
def __getitem__(self, key: typing.Any) -> str: |
|
return self._dict[key][0] |
|
|
|
def __contains__(self, key: typing.Any) -> bool: |
|
return key in self._dict |
|
|
|
def __iter__(self) -> typing.Iterator[typing.Any]: |
|
return iter(self.keys()) |
|
|
|
def __len__(self) -> int: |
|
return len(self._dict) |
|
|
|
def __bool__(self) -> bool: |
|
return bool(self._dict) |
|
|
|
def __hash__(self) -> int: |
|
return hash(str(self)) |
|
|
|
def __eq__(self, other: typing.Any) -> bool: |
|
if not isinstance(other, self.__class__): |
|
return False |
|
return sorted(self.multi_items()) == sorted(other.multi_items()) |
|
|
|
def __str__(self) -> str: |
|
""" |
|
Note that we use '%20' encoding for spaces, and treat '/' as a safe |
|
character. |
|
|
|
See https://github.com/encode/httpx/issues/2536 and |
|
https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode |
|
""" |
|
return urlencode(self.multi_items()) |
|
|
|
def __repr__(self) -> str: |
|
class_name = self.__class__.__name__ |
|
query_string = str(self) |
|
return f"{class_name}({query_string!r})" |
|
|
|
def update(self, params: QueryParamTypes | None = None) -> None: |
|
raise RuntimeError( |
|
"QueryParams are immutable since 0.18.0. " |
|
"Use `q = q.merge(...)` to create an updated copy." |
|
) |
|
|
|
def __setitem__(self, key: str, value: str) -> None: |
|
raise RuntimeError( |
|
"QueryParams are immutable since 0.18.0. " |
|
"Use `q = q.set(key, value)` to create an updated copy." |
|
) |
|
|