From 4786b99dd38ddc89ba673152ddf6b7940ace921b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 18 Sep 2021 18:15:07 -0700 Subject: [PATCH] REF: ExtensionIndex.searchsorted -> IndexOpsMixin.searchsorted --- pandas/core/algorithms.py | 7 +++-- pandas/core/base.py | 41 +++++++++++++++++++++++++++-- pandas/core/indexes/extension.py | 45 -------------------------------- pandas/core/series.py | 7 ++--- 4 files changed, 48 insertions(+), 52 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7d2eb3acf2ed2..641574c552bf1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -95,6 +95,7 @@ ) from pandas.core.arrays import ( DatetimeArray, + ExtensionArray, TimedeltaArray, ) @@ -1530,7 +1531,7 @@ def take( def searchsorted( arr: ArrayLike, - value: NumpyValueArrayLike, + value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: NumpySorter = None, ) -> npt.NDArray[np.intp] | np.intp: @@ -1611,7 +1612,9 @@ def searchsorted( # and `value` is a pd.Timestamp, we may need to convert value arr = ensure_wrapped_if_datetimelike(arr) - return arr.searchsorted(value, side=side, sorter=sorter) + # Argument 1 to "searchsorted" of "ndarray" has incompatible type + # "Union[NumpyValueArrayLike, ExtensionArray]"; expected "NumpyValueArrayLike" + return arr.searchsorted(value, side=side, sorter=sorter) # type: ignore[arg-type] # ---- # diff --git a/pandas/core/base.py b/pandas/core/base.py index b0993bbc619dc..3185099ccf410 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -14,6 +14,7 @@ TypeVar, cast, final, + overload, ) import numpy as np @@ -1226,14 +1227,50 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1): 0 # wrong result, correct would be 1 """ + # This overload is needed so that the call to searchsorted in + # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result + + @overload + # The following ignore is also present in numpy/__init__.pyi + # Possibly a mypy bug?? + # error: Overloaded function signatures 1 and 2 overlap with incompatible + # return types [misc] + def searchsorted( # type: ignore[misc] + self, + value: npt._ScalarLike_co, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> np.intp: + ... + + @overload + def searchsorted( + self, + value: npt.ArrayLike | ExtensionArray, + side: Literal["left", "right"] = "left", + sorter: NumpySorter = None, + ) -> npt.NDArray[np.intp]: + ... + @doc(_shared_docs["searchsorted"], klass="Index") def searchsorted( self, - value: NumpyValueArrayLike, + value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: NumpySorter = None, ) -> npt.NDArray[np.intp] | np.intp: - return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) + + values = self._values + if not isinstance(values, np.ndarray): + # Going through EA.searchsorted directly improves performance GH#38083 + return values.searchsorted(value, side=side, sorter=sorter) + + return algorithms.searchsorted( + values, + value, + side=side, + sorter=sorter, + ) def drop_duplicates(self, keep="first"): duplicated = self._duplicated(keep=keep) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 44267895a989e..fd4bfe3791978 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -4,11 +4,8 @@ from __future__ import annotations from typing import ( - TYPE_CHECKING, Hashable, - Literal, TypeVar, - overload, ) import numpy as np @@ -38,17 +35,9 @@ TimedeltaArray, ) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -from pandas.core.arrays.base import ExtensionArray from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.base import Index -if TYPE_CHECKING: - - from pandas._typing import ( - NumpySorter, - NumpyValueArrayLike, - ) - _T = TypeVar("_T", bound="NDArrayBackedExtensionIndex") @@ -207,40 +196,6 @@ def __getitem__(self, key): deprecate_ndim_indexing(result) return result - # This overload is needed so that the call to searchsorted in - # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result - - @overload - # The following ignore is also present in numpy/__init__.pyi - # Possibly a mypy bug?? - # error: Overloaded function signatures 1 and 2 overlap with incompatible - # return types [misc] - def searchsorted( # type: ignore[misc] - self, - value: npt._ScalarLike_co, - side: Literal["left", "right"] = "left", - sorter: NumpySorter = None, - ) -> np.intp: - ... - - @overload - def searchsorted( - self, - value: npt.ArrayLike | ExtensionArray, - side: Literal["left", "right"] = "left", - sorter: NumpySorter = None, - ) -> npt.NDArray[np.intp]: - ... - - def searchsorted( - self, - value: NumpyValueArrayLike | ExtensionArray, - side: Literal["left", "right"] = "left", - sorter: NumpySorter = None, - ) -> npt.NDArray[np.intp] | np.intp: - # overriding IndexOpsMixin improves performance GH#38083 - return self._data.searchsorted(value, side=side, sorter=sorter) - # --------------------------------------------------------------------- def _get_engine_target(self) -> np.ndarray: diff --git a/pandas/core/series.py b/pandas/core/series.py index 8079cfbbcfc40..7f612df095c4b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2790,13 +2790,14 @@ def __rmatmul__(self, other): return self.dot(np.transpose(other)) @doc(base.IndexOpsMixin.searchsorted, klass="Series") - def searchsorted( + # Signature of "searchsorted" incompatible with supertype "IndexOpsMixin" + def searchsorted( # type: ignore[override] self, - value: NumpyValueArrayLike, + value: NumpyValueArrayLike | ExtensionArray, side: Literal["left", "right"] = "left", sorter: NumpySorter = None, ) -> npt.NDArray[np.intp] | np.intp: - return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) + return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter) # ------------------------------------------------------------------- # Combination