diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 40533cdd554b3..50d016ce211d7 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -907,7 +907,7 @@ def value_counts_arraylike(values, dropna: bool): return keys, counts -def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray: +def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarray: """ Return boolean ndarray denoting duplicate values. diff --git a/pandas/core/base.py b/pandas/core/base.py index fd40e0467720d..9b2efeff76926 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1316,5 +1316,5 @@ def drop_duplicates(self, keep="first"): # error: Value of type "IndexOpsMixin" is not indexable return self[~duplicated] # type: ignore[index] - def duplicated(self, keep="first"): + def duplicated(self, keep: Union[str, bool] = "first") -> np.ndarray: return duplicated(self._values, keep=keep) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 71095b8f4113a..64b41c8614049 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -17,6 +17,7 @@ Sequence, Set, Tuple, + Type, TypeVar, Union, cast, @@ -47,6 +48,7 @@ Dtype, DtypeObj, Shape, + T, final, ) from pandas.compat.numpy import function as nv @@ -161,6 +163,7 @@ if TYPE_CHECKING: from pandas import ( CategoricalIndex, + DataFrame, IntervalIndex, MultiIndex, RangeIndex, @@ -278,16 +281,22 @@ class Index(IndexOpsMixin, PandasObject): # for why we need to wrap these instead of making them class attributes # Moreover, cython will choose the appropriate-dtyped sub-function # given the dtypes of the passed arguments - def _left_indexer_unique(self, left, right): + def _left_indexer_unique(self, left: np.ndarray, right: np.ndarray) -> np.ndarray: return libjoin.left_join_indexer_unique(left, right) - def _left_indexer(self, left, right): + def _left_indexer( + self, left: np.ndarray, right: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.left_join_indexer(left, right) - def _inner_indexer(self, left, right): + def _inner_indexer( + self, left: np.ndarray, right: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.inner_join_indexer(left, right) - def _outer_indexer(self, left, right): + def _outer_indexer( + self, left: np.ndarray, right: np.ndarray + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: return libjoin.outer_join_indexer(left, right) _typ = "index" @@ -548,7 +557,7 @@ def asi8(self): return None @classmethod - def _simple_new(cls, values, name: Hashable = None): + def _simple_new(cls: Type[_IndexT], values, name: Hashable = None) -> _IndexT: """ We require that we have a dtype compat for the values. If we are passed a non-dtype compat, then coerce using the constructor. @@ -571,11 +580,11 @@ def _simple_new(cls, values, name: Hashable = None): return result @cache_readonly - def _constructor(self): + def _constructor(self: _IndexT) -> Type[_IndexT]: return type(self) @final - def _maybe_check_unique(self): + def _maybe_check_unique(self) -> None: """ Check that an Index has no duplicates. @@ -626,13 +635,13 @@ def _format_duplicate_message(self): # Index Internals Methods @final - def _get_attributes_dict(self): + def _get_attributes_dict(self) -> Dict[str_t, Any]: """ Return an attributes dict for my class. """ return {k: getattr(self, k, None) for k in self._attributes} - def _shallow_copy(self, values, name: Hashable = no_default): + def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT: """ Create a new Index with the same class as the caller, don't copy the data, use the same object attributes with passed in attributes taking @@ -706,11 +715,11 @@ def _reset_identity(self) -> None: self._id = _Identity(object()) @final - def _cleanup(self): + def _cleanup(self) -> None: self._engine.clear_mapping() @cache_readonly - def _engine(self): + def _engine(self) -> libindex.ObjectEngine: # property, for now, slow to look up # to avoid a reference cycle, bind `target_values` to a local variable, so @@ -1243,7 +1252,7 @@ def to_flat_index(self): """ return self - def to_series(self, index=None, name=None): + def to_series(self, index=None, name: Hashable = None) -> Series: """ Create a Series with both index and values equal to the index keys. @@ -1306,7 +1315,7 @@ def to_series(self, index=None, name=None): return Series(self._values.copy(), index=index, name=name) - def to_frame(self, index: bool = True, name=None): + def to_frame(self, index: bool = True, name=None) -> DataFrame: """ Create a DataFrame with a column containing the Index. @@ -1421,10 +1430,10 @@ def _validate_names( return new_names - def _get_names(self): + def _get_names(self) -> FrozenList: return FrozenList((self.name,)) - def _set_names(self, values, level=None): + def _set_names(self, values, level=None) -> None: """ Set new names on index. Each name has to be a hashable type. @@ -1625,14 +1634,14 @@ def nlevels(self) -> int: """ return 1 - def _sort_levels_monotonic(self): + def _sort_levels_monotonic(self: _IndexT) -> _IndexT: """ Compat with MultiIndex. """ return self @final - def _validate_index_level(self, level): + def _validate_index_level(self, level) -> None: """ Validate index level. @@ -2369,7 +2378,7 @@ def hasnans(self) -> bool: return False @final - def isna(self): + def isna(self) -> np.ndarray: """ Detect missing values. @@ -2427,7 +2436,7 @@ def isna(self): isnull = isna @final - def notna(self): + def notna(self) -> np.ndarray: """ Detect existing (non-missing) values. @@ -2505,7 +2514,7 @@ def fillna(self, value=None, downcast=None): return Index(result, name=self.name) return self._view() - def dropna(self, how="any"): + def dropna(self: _IndexT, how: str_t = "any") -> _IndexT: """ Return Index without NA/NaN values. @@ -2530,7 +2539,7 @@ def dropna(self, how="any"): # -------------------------------------------------------------------- # Uniqueness Methods - def unique(self, level=None): + def unique(self: _IndexT, level: Optional[Hashable] = None) -> _IndexT: """ Return unique values in the index. @@ -2538,12 +2547,13 @@ def unique(self, level=None): Parameters ---------- - level : int or str, optional, default None + level : int or hashable, optional Only return values from specified level (for MultiIndex). + If int, gets the level by integer position, else by level name. Returns ------- - Index without duplicates + Index See Also -------- @@ -2560,7 +2570,7 @@ def unique(self, level=None): return self._shallow_copy(result) @final - def drop_duplicates(self, keep="first"): + def drop_duplicates(self: _IndexT, keep: Union[str_t, bool] = "first") -> _IndexT: """ Return Index with duplicate values removed. @@ -2611,7 +2621,7 @@ def drop_duplicates(self, keep="first"): return super().drop_duplicates(keep=keep) - def duplicated(self, keep="first"): + def duplicated(self, keep: Union[str_t, bool] = "first") -> np.ndarray: """ Indicate duplicate index values. @@ -3197,12 +3207,12 @@ def symmetric_difference(self, other, result_name=None, sort=None): return Index(the_diff, name=result_name) @final - def _assert_can_do_setop(self, other): + def _assert_can_do_setop(self, other) -> bool: if not is_list_like(other): raise TypeError("Input must be Index or array-like") return True - def _convert_can_do_setop(self, other): + def _convert_can_do_setop(self, other) -> Tuple[Index, Hashable]: if not isinstance(other, Index): other = Index(other, name=self.name) result_name = self.name @@ -3385,7 +3395,7 @@ def _get_indexer( return ensure_platform_int(indexer) @final - def _check_indexing_method(self, method): + def _check_indexing_method(self, method: Optional[str_t]) -> None: """ Raise if we have a get_indexer `method` that is not supported or valid. """ @@ -3403,7 +3413,9 @@ def _check_indexing_method(self, method): raise ValueError("Invalid fill method") - def _convert_tolerance(self, tolerance, target): + def _convert_tolerance( + self, tolerance, target: Union[np.ndarray, Index] + ) -> np.ndarray: # override this method on subclasses tolerance = np.asarray(tolerance) if target.size != tolerance.size and tolerance.size > 1: @@ -3506,7 +3518,7 @@ def _filter_indexer_tolerance( # -------------------------------------------------------------------- # Indexer Conversion Methods - def _get_partial_string_timestamp_match_key(self, key): + def _get_partial_string_timestamp_match_key(self, key: T) -> T: """ Translate any partial string timestamp matches in key, returning the new key. @@ -3517,7 +3529,7 @@ def _get_partial_string_timestamp_match_key(self, key): return key @final - def _validate_positional_slice(self, key: slice): + def _validate_positional_slice(self, key: slice) -> None: """ For positional indexing, a slice must have either int or None for each of start, stop, and step. @@ -3618,7 +3630,7 @@ def _convert_listlike_indexer(self, keyarr): indexer = self._convert_list_indexer(keyarr) return indexer, keyarr - def _convert_arr_indexer(self, keyarr): + def _convert_arr_indexer(self, keyarr) -> np.ndarray: """ Convert an array-like indexer to the appropriate dtype. @@ -3663,13 +3675,13 @@ def _invalid_indexer(self, form: str_t, key) -> TypeError: # Reindex Methods @final - def _can_reindex(self, indexer): + def _validate_can_reindex(self, indexer: np.ndarray) -> None: """ Check if we are allowing reindexing with this particular indexer. Parameters ---------- - indexer : an integer indexer + indexer : an integer ndarray Raises ------ @@ -6192,7 +6204,7 @@ def trim_front(strings: List[str]) -> List[str]: return strings -def _validate_join_method(method: str): +def _validate_join_method(method: str) -> None: if method not in ["left", "right", "inner", "outer"]: raise ValueError(f"do not recognize join method {method}") @@ -6404,7 +6416,7 @@ def get_unanimous_names(*indexes: Index) -> Tuple[Hashable, ...]: return names -def unpack_nested_dtype(other: Index) -> Index: +def unpack_nested_dtype(other: _IndexT) -> _IndexT: """ When checking if our dtype is comparable with another, we need to unpack CategoricalDtype to look at its categories.dtype. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7ef81b0947a22..39a15a8b54a92 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1109,7 +1109,7 @@ def _engine(self): return MultiIndexUIntEngine(self.levels, self.codes, offsets) @property - def _constructor(self): + def _constructor(self) -> Callable[..., MultiIndex]: return type(self).from_tuples @doc(Index._shallow_copy) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index bd9a92a657991..a0f546a6bd748 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -10,6 +10,7 @@ List, Optional, Tuple, + Type, ) import warnings @@ -171,7 +172,7 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: # -------------------------------------------------------------------- @cache_readonly - def _constructor(self): + def _constructor(self) -> Type[Int64Index]: """ return the class to use for construction """ return Int64Index diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0e52ebf69137c..e09a434170780 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -802,7 +802,7 @@ def _reindex_indexer( # some axes don't allow reindexing with dups if not allow_dups: - self._axes[axis]._can_reindex(indexer) + self._axes[axis]._validate_can_reindex(indexer) # if axis >= self.ndim: # raise IndexError("Requested axis not found in manager") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b3f0466f236b6..f72b288adf348 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1245,7 +1245,7 @@ def reindex_indexer( # some axes don't allow reindexing with dups if not allow_dups: - self.axes[axis]._can_reindex(indexer) + self.axes[axis]._validate_can_reindex(indexer) if axis >= self.ndim: raise IndexError("Requested axis not found in manager")