Skip to content

TYP: indexes/base.py #39897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ def value_counts_arraylike(values, dropna: bool):
return keys, counts


def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray:
def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarray:
"""
Return boolean ndarray denoting duplicate values.

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,5 +1316,5 @@ def drop_duplicates(self, keep="first"):
# error: Value of type "IndexOpsMixin" is not indexable
return self[~duplicated] # type: ignore[index]

def duplicated(self, keep="first"):
def duplicated(self, keep: Union[str, bool] = "first") -> np.ndarray:
return duplicated(self._values, keep=keep)
84 changes: 48 additions & 36 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
Sequence,
Set,
Tuple,
Type,
TypeVar,
Union,
cast,
Expand Down Expand Up @@ -47,6 +48,7 @@
Dtype,
DtypeObj,
Shape,
T,
final,
)
from pandas.compat.numpy import function as nv
Expand Down Expand Up @@ -161,6 +163,7 @@
if TYPE_CHECKING:
from pandas import (
CategoricalIndex,
DataFrame,
IntervalIndex,
MultiIndex,
RangeIndex,
Expand Down Expand Up @@ -278,16 +281,22 @@ class Index(IndexOpsMixin, PandasObject):
# for why we need to wrap these instead of making them class attributes
# Moreover, cython will choose the appropriate-dtyped sub-function
# given the dtypes of the passed arguments
def _left_indexer_unique(self, left, right):
def _left_indexer_unique(self, left: np.ndarray, right: np.ndarray) -> np.ndarray:
return libjoin.left_join_indexer_unique(left, right)

def _left_indexer(self, left, right):
def _left_indexer(
self, left: np.ndarray, right: np.ndarray
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
return libjoin.left_join_indexer(left, right)

def _inner_indexer(self, left, right):
def _inner_indexer(
self, left: np.ndarray, right: np.ndarray
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
return libjoin.inner_join_indexer(left, right)

def _outer_indexer(self, left, right):
def _outer_indexer(
self, left: np.ndarray, right: np.ndarray
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
return libjoin.outer_join_indexer(left, right)

_typ = "index"
Expand Down Expand Up @@ -548,7 +557,7 @@ def asi8(self):
return None

@classmethod
def _simple_new(cls, values, name: Hashable = None):
def _simple_new(cls: Type[_IndexT], values, name: Hashable = None) -> _IndexT:
"""
We require that we have a dtype compat for the values. If we are passed
a non-dtype compat, then coerce using the constructor.
Expand All @@ -571,11 +580,11 @@ def _simple_new(cls, values, name: Hashable = None):
return result

@cache_readonly
def _constructor(self):
def _constructor(self: _IndexT) -> Type[_IndexT]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it matter that a couple of subclasses behave differently?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ive typed the subclass methods, I dont think this is a problem.

return type(self)

@final
def _maybe_check_unique(self):
def _maybe_check_unique(self) -> None:
"""
Check that an Index has no duplicates.

Expand Down Expand Up @@ -626,13 +635,13 @@ def _format_duplicate_message(self):
# Index Internals Methods

@final
def _get_attributes_dict(self):
def _get_attributes_dict(self) -> Dict[str_t, Any]:
"""
Return an attributes dict for my class.
"""
return {k: getattr(self, k, None) for k in self._attributes}

def _shallow_copy(self, values, name: Hashable = no_default):
def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:
"""
Create a new Index with the same class as the caller, don't copy the
data, use the same object attributes with passed in attributes taking
Expand Down Expand Up @@ -706,11 +715,11 @@ def _reset_identity(self) -> None:
self._id = _Identity(object())

@final
def _cleanup(self):
def _cleanup(self) -> None:
self._engine.clear_mapping()

@cache_readonly
def _engine(self):
def _engine(self) -> libindex.ObjectEngine:
# property, for now, slow to look up

# to avoid a reference cycle, bind `target_values` to a local variable, so
Expand Down Expand Up @@ -1243,7 +1252,7 @@ def to_flat_index(self):
"""
return self

def to_series(self, index=None, name=None):
def to_series(self, index=None, name: Hashable = None) -> Series:
"""
Create a Series with both index and values equal to the index keys.

Expand Down Expand Up @@ -1306,7 +1315,7 @@ def to_series(self, index=None, name=None):

return Series(self._values.copy(), index=index, name=name)

def to_frame(self, index: bool = True, name=None):
def to_frame(self, index: bool = True, name=None) -> DataFrame:
"""
Create a DataFrame with a column containing the Index.

Expand Down Expand Up @@ -1421,10 +1430,10 @@ def _validate_names(

return new_names

def _get_names(self):
def _get_names(self) -> FrozenList:
return FrozenList((self.name,))

def _set_names(self, values, level=None):
def _set_names(self, values, level=None) -> None:
"""
Set new names on index. Each name has to be a hashable type.

Expand Down Expand Up @@ -1625,14 +1634,14 @@ def nlevels(self) -> int:
"""
return 1

def _sort_levels_monotonic(self):
def _sort_levels_monotonic(self: _IndexT) -> _IndexT:
"""
Compat with MultiIndex.
"""
return self

@final
def _validate_index_level(self, level):
def _validate_index_level(self, level) -> None:
"""
Validate index level.

Expand Down Expand Up @@ -2369,7 +2378,7 @@ def hasnans(self) -> bool:
return False

@final
def isna(self):
def isna(self) -> np.ndarray:
"""
Detect missing values.

Expand Down Expand Up @@ -2427,7 +2436,7 @@ def isna(self):
isnull = isna

@final
def notna(self):
def notna(self) -> np.ndarray:
"""
Detect existing (non-missing) values.

Expand Down Expand Up @@ -2505,7 +2514,7 @@ def fillna(self, value=None, downcast=None):
return Index(result, name=self.name)
return self._view()

def dropna(self, how="any"):
def dropna(self: _IndexT, how: str_t = "any") -> _IndexT:
"""
Return Index without NA/NaN values.

Expand All @@ -2530,20 +2539,21 @@ def dropna(self, how="any"):
# --------------------------------------------------------------------
# Uniqueness Methods

def unique(self, level=None):
def unique(self: _IndexT, level: Optional[Hashable] = None) -> _IndexT:
"""
Return unique values in the index.

Unique values are returned in order of appearance, this does NOT sort.

Parameters
----------
level : int or str, optional, default None
level : int or hashable, optional
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"hashable" seems weird here. may parenthetically "level name"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've tried adding a text below the types.

Only return values from specified level (for MultiIndex).
If int, gets the level by integer position, else by level name.

Returns
-------
Index without duplicates
Index

See Also
--------
Expand All @@ -2560,7 +2570,7 @@ def unique(self, level=None):
return self._shallow_copy(result)

@final
def drop_duplicates(self, keep="first"):
def drop_duplicates(self: _IndexT, keep: Union[str_t, bool] = "first") -> _IndexT:
"""
Return Index with duplicate values removed.

Expand Down Expand Up @@ -2611,7 +2621,7 @@ def drop_duplicates(self, keep="first"):

return super().drop_duplicates(keep=keep)

def duplicated(self, keep="first"):
def duplicated(self, keep: Union[str_t, bool] = "first") -> np.ndarray:
"""
Indicate duplicate index values.

Expand Down Expand Up @@ -3197,12 +3207,12 @@ def symmetric_difference(self, other, result_name=None, sort=None):
return Index(the_diff, name=result_name)

@final
def _assert_can_do_setop(self, other):
def _assert_can_do_setop(self, other) -> bool:
if not is_list_like(other):
raise TypeError("Input must be Index or array-like")
return True

def _convert_can_do_setop(self, other):
def _convert_can_do_setop(self, other) -> Tuple[Index, Hashable]:
if not isinstance(other, Index):
other = Index(other, name=self.name)
result_name = self.name
Expand Down Expand Up @@ -3385,7 +3395,7 @@ def _get_indexer(
return ensure_platform_int(indexer)

@final
def _check_indexing_method(self, method):
def _check_indexing_method(self, method: Optional[str_t]) -> None:
"""
Raise if we have a get_indexer `method` that is not supported or valid.
"""
Expand All @@ -3403,7 +3413,9 @@ def _check_indexing_method(self, method):

raise ValueError("Invalid fill method")

def _convert_tolerance(self, tolerance, target):
def _convert_tolerance(
self, tolerance, target: Union[np.ndarray, Index]
) -> np.ndarray:
# override this method on subclasses
tolerance = np.asarray(tolerance)
if target.size != tolerance.size and tolerance.size > 1:
Expand Down Expand Up @@ -3506,7 +3518,7 @@ def _filter_indexer_tolerance(
# --------------------------------------------------------------------
# Indexer Conversion Methods

def _get_partial_string_timestamp_match_key(self, key):
def _get_partial_string_timestamp_match_key(self, key: T) -> T:
"""
Translate any partial string timestamp matches in key, returning the
new key.
Expand All @@ -3517,7 +3529,7 @@ def _get_partial_string_timestamp_match_key(self, key):
return key

@final
def _validate_positional_slice(self, key: slice):
def _validate_positional_slice(self, key: slice) -> None:
"""
For positional indexing, a slice must have either int or None
for each of start, stop, and step.
Expand Down Expand Up @@ -3618,7 +3630,7 @@ def _convert_listlike_indexer(self, keyarr):
indexer = self._convert_list_indexer(keyarr)
return indexer, keyarr

def _convert_arr_indexer(self, keyarr):
def _convert_arr_indexer(self, keyarr) -> np.ndarray:
"""
Convert an array-like indexer to the appropriate dtype.

Expand Down Expand Up @@ -3663,13 +3675,13 @@ def _invalid_indexer(self, form: str_t, key) -> TypeError:
# Reindex Methods

@final
def _can_reindex(self, indexer):
def _validate_can_reindex(self, indexer: np.ndarray) -> None:
"""
Check if we are allowing reindexing with this particular indexer.

Parameters
----------
indexer : an integer indexer
indexer : an integer ndarray

Raises
------
Expand Down Expand Up @@ -6192,7 +6204,7 @@ def trim_front(strings: List[str]) -> List[str]:
return strings


def _validate_join_method(method: str):
def _validate_join_method(method: str) -> None:
if method not in ["left", "right", "inner", "outer"]:
raise ValueError(f"do not recognize join method {method}")

Expand Down Expand Up @@ -6404,7 +6416,7 @@ def get_unanimous_names(*indexes: Index) -> Tuple[Hashable, ...]:
return names


def unpack_nested_dtype(other: Index) -> Index:
def unpack_nested_dtype(other: _IndexT) -> _IndexT:
"""
When checking if our dtype is comparable with another, we need
to unpack CategoricalDtype to look at its categories.dtype.
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1109,7 +1109,7 @@ def _engine(self):
return MultiIndexUIntEngine(self.levels, self.codes, offsets)

@property
def _constructor(self):
def _constructor(self) -> Callable[..., MultiIndex]:
return type(self).from_tuples

@doc(Index._shallow_copy)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
List,
Optional,
Tuple,
Type,
)
import warnings

Expand Down Expand Up @@ -171,7 +172,7 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
# --------------------------------------------------------------------

@cache_readonly
def _constructor(self):
def _constructor(self) -> Type[Int64Index]:
""" return the class to use for construction """
return Int64Index

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ def _reindex_indexer(

# some axes don't allow reindexing with dups
if not allow_dups:
self._axes[axis]._can_reindex(indexer)
self._axes[axis]._validate_can_reindex(indexer)

# if axis >= self.ndim:
# raise IndexError("Requested axis not found in manager")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1245,7 +1245,7 @@ def reindex_indexer(

# some axes don't allow reindexing with dups
if not allow_dups:
self.axes[axis]._can_reindex(indexer)
self.axes[axis]._validate_can_reindex(indexer)

if axis >= self.ndim:
raise IndexError("Requested axis not found in manager")
Expand Down