cmrit
/
cmrithackathon-master
/.venv
/lib
/python3.11
/site-packages
/pandas
/compat
/numpy
/function.py
""" | |
For compatibility with numpy libraries, pandas functions or methods have to | |
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that | |
are not actually used or respected in the pandas implementation. | |
To ensure that users do not abuse these parameters, validation is performed in | |
'validators.py' to make sure that any extra parameters passed correspond ONLY | |
to those in the numpy signature. Part of that validation includes whether or | |
not the user attempted to pass in non-default values for these extraneous | |
parameters. As we want to discourage users from relying on these parameters | |
when calling the pandas implementation, we want them only to pass in the | |
default values for these parameters. | |
This module provides a set of commonly used default arguments for functions and | |
methods that are spread throughout the codebase. This module will make it | |
easier to adjust to future upstream changes in the analogous numpy signatures. | |
""" | |
from __future__ import annotations | |
from typing import ( | |
TYPE_CHECKING, | |
Any, | |
TypeVar, | |
cast, | |
overload, | |
) | |
import numpy as np | |
from numpy import ndarray | |
from pandas._libs.lib import ( | |
is_bool, | |
is_integer, | |
) | |
from pandas.errors import UnsupportedFunctionCall | |
from pandas.util._validators import ( | |
validate_args, | |
validate_args_and_kwargs, | |
validate_kwargs, | |
) | |
if TYPE_CHECKING: | |
from pandas._typing import ( | |
Axis, | |
AxisInt, | |
) | |
AxisNoneT = TypeVar("AxisNoneT", Axis, None) | |
class CompatValidator: | |
def __init__( | |
self, | |
defaults, | |
fname=None, | |
method: str | None = None, | |
max_fname_arg_count=None, | |
) -> None: | |
self.fname = fname | |
self.method = method | |
self.defaults = defaults | |
self.max_fname_arg_count = max_fname_arg_count | |
def __call__( | |
self, | |
args, | |
kwargs, | |
fname=None, | |
max_fname_arg_count=None, | |
method: str | None = None, | |
) -> None: | |
if not args and not kwargs: | |
return None | |
fname = self.fname if fname is None else fname | |
max_fname_arg_count = ( | |
self.max_fname_arg_count | |
if max_fname_arg_count is None | |
else max_fname_arg_count | |
) | |
method = self.method if method is None else method | |
if method == "args": | |
validate_args(fname, args, max_fname_arg_count, self.defaults) | |
elif method == "kwargs": | |
validate_kwargs(fname, kwargs, self.defaults) | |
elif method == "both": | |
validate_args_and_kwargs( | |
fname, args, kwargs, max_fname_arg_count, self.defaults | |
) | |
else: | |
raise ValueError(f"invalid validation method '{method}'") | |
ARGMINMAX_DEFAULTS = {"out": None} | |
validate_argmin = CompatValidator( | |
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1 | |
) | |
validate_argmax = CompatValidator( | |
ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1 | |
) | |
def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]: | |
if isinstance(skipna, ndarray) or skipna is None: | |
args = (skipna,) + args | |
skipna = True | |
return skipna, args | |
def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool: | |
""" | |
If 'Series.argmin' is called via the 'numpy' library, the third parameter | |
in its signature is 'out', which takes either an ndarray or 'None', so | |
check if the 'skipna' parameter is either an instance of ndarray or is | |
None, since 'skipna' itself should be a boolean | |
""" | |
skipna, args = process_skipna(skipna, args) | |
validate_argmin(args, kwargs) | |
return skipna | |
def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool: | |
""" | |
If 'Series.argmax' is called via the 'numpy' library, the third parameter | |
in its signature is 'out', which takes either an ndarray or 'None', so | |
check if the 'skipna' parameter is either an instance of ndarray or is | |
None, since 'skipna' itself should be a boolean | |
""" | |
skipna, args = process_skipna(skipna, args) | |
validate_argmax(args, kwargs) | |
return skipna | |
ARGSORT_DEFAULTS: dict[str, int | str | None] = {} | |
ARGSORT_DEFAULTS["axis"] = -1 | |
ARGSORT_DEFAULTS["kind"] = "quicksort" | |
ARGSORT_DEFAULTS["order"] = None | |
ARGSORT_DEFAULTS["kind"] = None | |
ARGSORT_DEFAULTS["stable"] = None | |
validate_argsort = CompatValidator( | |
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both" | |
) | |
# two different signatures of argsort, this second validation for when the | |
# `kind` param is supported | |
ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {} | |
ARGSORT_DEFAULTS_KIND["axis"] = -1 | |
ARGSORT_DEFAULTS_KIND["order"] = None | |
ARGSORT_DEFAULTS_KIND["stable"] = None | |
validate_argsort_kind = CompatValidator( | |
ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both" | |
) | |
def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool: | |
""" | |
If 'Categorical.argsort' is called via the 'numpy' library, the first | |
parameter in its signature is 'axis', which takes either an integer or | |
'None', so check if the 'ascending' parameter has either integer type or is | |
None, since 'ascending' itself should be a boolean | |
""" | |
if is_integer(ascending) or ascending is None: | |
args = (ascending,) + args | |
ascending = True | |
validate_argsort_kind(args, kwargs, max_fname_arg_count=3) | |
ascending = cast(bool, ascending) | |
return ascending | |
CLIP_DEFAULTS: dict[str, Any] = {"out": None} | |
validate_clip = CompatValidator( | |
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3 | |
) | |
def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None: | |
... | |
def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT: | |
... | |
def validate_clip_with_axis( | |
axis: ndarray | AxisNoneT, args, kwargs | |
) -> AxisNoneT | None: | |
""" | |
If 'NDFrame.clip' is called via the numpy library, the third parameter in | |
its signature is 'out', which can takes an ndarray, so check if the 'axis' | |
parameter is an instance of ndarray, since 'axis' itself should either be | |
an integer or None | |
""" | |
if isinstance(axis, ndarray): | |
args = (axis,) + args | |
# error: Incompatible types in assignment (expression has type "None", | |
# variable has type "Union[ndarray[Any, Any], str, int]") | |
axis = None # type: ignore[assignment] | |
validate_clip(args, kwargs) | |
# error: Incompatible return value type (got "Union[ndarray[Any, Any], | |
# str, int]", expected "Union[str, int, None]") | |
return axis # type: ignore[return-value] | |
CUM_FUNC_DEFAULTS: dict[str, Any] = {} | |
CUM_FUNC_DEFAULTS["dtype"] = None | |
CUM_FUNC_DEFAULTS["out"] = None | |
validate_cum_func = CompatValidator( | |
CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1 | |
) | |
validate_cumsum = CompatValidator( | |
CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1 | |
) | |
def validate_cum_func_with_skipna(skipna: bool, args, kwargs, name) -> bool: | |
""" | |
If this function is called via the 'numpy' library, the third parameter in | |
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so | |
check if the 'skipna' parameter is a boolean or not | |
""" | |
if not is_bool(skipna): | |
args = (skipna,) + args | |
skipna = True | |
elif isinstance(skipna, np.bool_): | |
skipna = bool(skipna) | |
validate_cum_func(args, kwargs, fname=name) | |
return skipna | |
ALLANY_DEFAULTS: dict[str, bool | None] = {} | |
ALLANY_DEFAULTS["dtype"] = None | |
ALLANY_DEFAULTS["out"] = None | |
ALLANY_DEFAULTS["keepdims"] = False | |
ALLANY_DEFAULTS["axis"] = None | |
validate_all = CompatValidator( | |
ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1 | |
) | |
validate_any = CompatValidator( | |
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1 | |
) | |
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False} | |
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") | |
MINMAX_DEFAULTS = {"axis": None, "dtype": None, "out": None, "keepdims": False} | |
validate_min = CompatValidator( | |
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 | |
) | |
validate_max = CompatValidator( | |
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 | |
) | |
RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"} | |
validate_reshape = CompatValidator( | |
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 | |
) | |
REPEAT_DEFAULTS: dict[str, Any] = {"axis": None} | |
validate_repeat = CompatValidator( | |
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 | |
) | |
ROUND_DEFAULTS: dict[str, Any] = {"out": None} | |
validate_round = CompatValidator( | |
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 | |
) | |
SORT_DEFAULTS: dict[str, int | str | None] = {} | |
SORT_DEFAULTS["axis"] = -1 | |
SORT_DEFAULTS["kind"] = "quicksort" | |
SORT_DEFAULTS["order"] = None | |
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") | |
STAT_FUNC_DEFAULTS: dict[str, Any | None] = {} | |
STAT_FUNC_DEFAULTS["dtype"] = None | |
STAT_FUNC_DEFAULTS["out"] = None | |
SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy() | |
SUM_DEFAULTS["axis"] = None | |
SUM_DEFAULTS["keepdims"] = False | |
SUM_DEFAULTS["initial"] = None | |
PROD_DEFAULTS = SUM_DEFAULTS.copy() | |
MEAN_DEFAULTS = SUM_DEFAULTS.copy() | |
MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy() | |
MEDIAN_DEFAULTS["overwrite_input"] = False | |
MEDIAN_DEFAULTS["keepdims"] = False | |
STAT_FUNC_DEFAULTS["keepdims"] = False | |
validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs") | |
validate_sum = CompatValidator( | |
SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1 | |
) | |
validate_prod = CompatValidator( | |
PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1 | |
) | |
validate_mean = CompatValidator( | |
MEAN_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1 | |
) | |
validate_median = CompatValidator( | |
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 | |
) | |
STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {} | |
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None | |
STAT_DDOF_FUNC_DEFAULTS["out"] = None | |
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False | |
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") | |
TAKE_DEFAULTS: dict[str, str | None] = {} | |
TAKE_DEFAULTS["out"] = None | |
TAKE_DEFAULTS["mode"] = "raise" | |
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") | |
def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool: | |
""" | |
If this function is called via the 'numpy' library, the third parameter in | |
its signature is 'axis', which takes either an ndarray or 'None', so check | |
if the 'convert' parameter is either an instance of ndarray or is None | |
""" | |
if isinstance(convert, ndarray) or convert is None: | |
args = (convert,) + args | |
convert = True | |
validate_take(args, kwargs, max_fname_arg_count=3, method="both") | |
return convert | |
TRANSPOSE_DEFAULTS = {"axes": None} | |
validate_transpose = CompatValidator( | |
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 | |
) | |
def validate_groupby_func(name: str, args, kwargs, allowed=None) -> None: | |
""" | |
'args' and 'kwargs' should be empty, except for allowed kwargs because all | |
of their necessary parameters are explicitly listed in the function | |
signature | |
""" | |
if allowed is None: | |
allowed = [] | |
kwargs = set(kwargs) - set(allowed) | |
if len(args) + len(kwargs) > 0: | |
raise UnsupportedFunctionCall( | |
"numpy operations are not valid with groupby. " | |
f"Use .groupby(...).{name}() instead" | |
) | |
RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var") | |
def validate_resampler_func(method: str, args, kwargs) -> None: | |
""" | |
'args' and 'kwargs' should be empty because all of their necessary | |
parameters are explicitly listed in the function signature | |
""" | |
if len(args) + len(kwargs) > 0: | |
if method in RESAMPLER_NUMPY_OPS: | |
raise UnsupportedFunctionCall( | |
"numpy operations are not valid with resample. " | |
f"Use .resample(...).{method}() instead" | |
) | |
raise TypeError("too many arguments passed in") | |
def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None: | |
""" | |
Ensure that the axis argument passed to min, max, argmin, or argmax is zero | |
or None, as otherwise it will be incorrectly ignored. | |
Parameters | |
---------- | |
axis : int or None | |
ndim : int, default 1 | |
Raises | |
------ | |
ValueError | |
""" | |
if axis is None: | |
return | |
if axis >= ndim or (axis < 0 and ndim + axis < 0): | |
raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})") | |
_validation_funcs = { | |
"median": validate_median, | |
"mean": validate_mean, | |
"min": validate_min, | |
"max": validate_max, | |
"sum": validate_sum, | |
"prod": validate_prod, | |
} | |
def validate_func(fname, args, kwargs) -> None: | |
if fname not in _validation_funcs: | |
return validate_stat_func(args, kwargs, fname=fname) | |
validation_func = _validation_funcs[fname] | |
return validation_func(args, kwargs) | |