From 608318332c9883cfe18ade0a185b37f1c7c93236 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Mar 2021 14:03:05 -0700 Subject: [PATCH 1/3] PERF: cache_readonly for Block properties --- pandas/core/internals/blocks.py | 51 ++++++++++++++++++++----------- pandas/core/internals/managers.py | 2 +- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7d8dcb34ed582..1ce1d8cd025aa 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -36,6 +36,7 @@ Shape, final, ) +from pandas.util._decorators import cache_readonly from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -165,11 +166,11 @@ class Block(libinternals.Block, PandasObject): _validate_ndim = True @final - @property + @cache_readonly def _consolidate_key(self): return self._can_consolidate, self.dtype.name - @property + @cache_readonly def is_view(self) -> bool: """ return a boolean if I am possibly a view """ values = self.values @@ -188,7 +189,7 @@ def _can_hold_na(self) -> bool: return values._can_hold_na @final - @property + @cache_readonly def is_categorical(self) -> bool: warnings.warn( "Block.is_categorical is deprecated and will be removed in a " @@ -217,6 +218,7 @@ def internal_values(self): """ return self.values + @property def array_values(self) -> ExtensionArray: """ The array that Series.array returns. Always an ExtensionArray. @@ -245,7 +247,7 @@ def get_block_values_for_json(self) -> np.ndarray: return np.asarray(self.values).reshape(self.shape) @final - @property + @cache_readonly def fill_value(self): # Used in reindex_indexer return na_value_for_dtype(self.dtype, compat=False) @@ -353,7 +355,7 @@ def shape(self) -> Shape: return self.values.shape @final - @property + @cache_readonly def dtype(self) -> DtypeObj: return self.values.dtype @@ -378,6 +380,11 @@ def delete(self, loc) -> None: """ self.values = np.delete(self.values, loc, 0) self.mgr_locs = self._mgr_locs.delete(loc) + try: + self._cache.clear() + except AttributeError: + # _cache not yet initialized + pass @final def apply(self, func, **kwargs) -> List[Block]: @@ -580,7 +587,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): """ values = self.values if values.dtype.kind in ["m", "M"]: - values = self.array_values() + values = self.array_values new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) @@ -909,7 +916,7 @@ def setitem(self, indexer, value): return self.coerce_to_target_dtype(value).setitem(indexer, value) if self.dtype.kind in ["m", "M"]: - arr = self.array_values().T + arr = self.array_values.T arr[indexer] = value return self @@ -1422,7 +1429,7 @@ class ExtensionBlock(Block): values: ExtensionArray - @property + @cache_readonly def shape(self) -> Shape: # TODO(EA2D): override unnecessary with 2D EAs if self.ndim == 1: @@ -1453,6 +1460,12 @@ def set_inplace(self, locs, values): # see GH#33457 assert locs.tolist() == [0] self.values = values + try: + # TODO(GH33457) this can be removed + self._cache.clear() + except AttributeError: + # _cache not yet initialized + pass def putmask(self, mask, new) -> List[Block]: """ @@ -1477,7 +1490,7 @@ def is_view(self) -> bool: """Extension arrays are never treated as views.""" return False - @property + @cache_readonly def is_numeric(self): return self.values.dtype._is_numeric @@ -1526,6 +1539,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(self.values).reshape(self.shape) + @cache_readonly def array_values(self) -> ExtensionArray: return self.values @@ -1716,7 +1730,7 @@ class HybridMixin: array_values: Callable def _can_hold_element(self, element: Any) -> bool: - values = self.array_values() + values = self.array_values try: values._validate_setitem_value(element) @@ -1757,13 +1771,13 @@ class NDArrayBackedExtensionBlock(HybridMixin, Block): def internal_values(self): # Override to return DatetimeArray and TimedeltaArray - return self.array_values() + return self.array_values def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ - values = self.array_values() + values = self.array_values if is_object_dtype(dtype): # DTA/TDA constructor and astype can handle 2D values = values.astype(object) @@ -1773,7 +1787,7 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta # TODO(EA2D): this can be removed if we ever have 2D EA - return self.array_values().reshape(self.shape)[key] + return self.array_values.reshape(self.shape)[key] def putmask(self, mask, new) -> List[Block]: mask = extract_bool_array(mask) @@ -1782,14 +1796,14 @@ def putmask(self, mask, new) -> List[Block]: return self.astype(object).putmask(mask, new) # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values().reshape(self.shape) + arr = self.array_values.reshape(self.shape) arr = cast("NDArrayBackedExtensionArray", arr) arr.T.putmask(mask, new) return [self] def where(self, other, cond, errors="raise") -> List[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values().reshape(self.shape) + arr = self.array_values.reshape(self.shape) cond = extract_bool_array(cond) @@ -1825,7 +1839,7 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: by apply. """ # TODO(EA2D): reshape not necessary with 2D EAs - values = self.array_values().reshape(self.shape) + values = self.array_values.reshape(self.shape) new_values = values - values.shift(n, axis=axis) new_values = maybe_coerce_values(new_values) @@ -1833,7 +1847,7 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs - values = self.array_values().reshape(self.shape) + values = self.array_values.reshape(self.shape) new_values = values.shift(periods, fill_value=fill_value, axis=axis) new_values = maybe_coerce_values(new_values) return [self.make_block_same_class(new_values)] @@ -1848,7 +1862,7 @@ def fillna( # TODO: don't special-case td64 return self.astype(object).fillna(value, limit, inplace, downcast) - values = self.array_values() + values = self.array_values values = values if inplace else values.copy() new_values = values.fillna(value=value, limit=limit) new_values = maybe_coerce_values(new_values) @@ -1860,6 +1874,7 @@ class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock): is_numeric = False + @cache_readonly def array_values(self): return ensure_wrapped_if_datetimelike(self.values) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e4cce731b7b56..6cdbbe67cb6cc 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1680,7 +1680,7 @@ def internal_values(self): def array_values(self): """The array that Series.array returns""" - return self._block.array_values() + return self._block.array_values @property def _can_hold_na(self) -> bool: From 12a4d1485f3f69fbb6d4cd86f0a872ed0445c3a6 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Mar 2021 14:06:45 -0700 Subject: [PATCH 2/3] revert cache of is_view --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1ce1d8cd025aa..5f8d265e7303a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -170,7 +170,7 @@ class Block(libinternals.Block, PandasObject): def _consolidate_key(self): return self._can_consolidate, self.dtype.name - @cache_readonly + @property def is_view(self) -> bool: """ return a boolean if I am possibly a view """ values = self.values From 4726091fd6c6d382881e2eff2dd91f00d594c681 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 28 Mar 2021 09:40:56 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/internals/blocks.py | 40 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 8b54b41f86158..09e214237b736 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1689,10 +1689,7 @@ def where(self, other, cond, errors="raise") -> List[Block]: # The default `other` for Series / Frame is np.nan # we want to replace that with the correct NA value # for the type - - # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" has no - # attribute "na_value" - other = self.dtype.na_value # type: ignore[union-attr] + other = self.dtype.na_value if is_sparse(self.values): # TODO(SparseArray.__setitem__): remove this if condition @@ -1756,7 +1753,8 @@ def _can_hold_element(self, element: Any) -> bool: values = self.array_values try: - values._validate_setitem_value(element) + # error: "Callable[..., Any]" has no attribute "_validate_setitem_value" + values._validate_setitem_value(element) # type: ignore[attr-defined] return True except (ValueError, TypeError): return False @@ -1782,9 +1780,7 @@ def _can_hold_element(self, element: Any) -> bool: if isinstance(element, (IntegerArray, FloatingArray)): if element._mask.any(): return False - # error: Argument 1 to "can_hold_element" has incompatible type - # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]" - return can_hold_element(self.dtype, element) # type: ignore[arg-type] + return can_hold_element(self.dtype, element) class NDArrayBackedExtensionBlock(HybridMixin, Block): @@ -1803,14 +1799,16 @@ def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: values = self.array_values if is_object_dtype(dtype): # DTA/TDA constructor and astype can handle 2D - values = values.astype(object) + # error: "Callable[..., Any]" has no attribute "astype" + values = values.astype(object) # type: ignore[attr-defined] # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(values).reshape(self.shape) def iget(self, key): # GH#31649 we need to wrap scalars in Timestamp/Timedelta # TODO(EA2D): this can be removed if we ever have 2D EA - return self.array_values.reshape(self.shape)[key] + # error: "Callable[..., Any]" has no attribute "reshape" + return self.array_values.reshape(self.shape)[key] # type: ignore[attr-defined] def putmask(self, mask, new) -> List[Block]: mask = extract_bool_array(mask) @@ -1819,14 +1817,16 @@ def putmask(self, mask, new) -> List[Block]: return self.astype(object).putmask(mask, new) # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values.reshape(self.shape) + # error: "Callable[..., Any]" has no attribute "reshape" + arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined] arr = cast("NDArrayBackedExtensionArray", arr) arr.T.putmask(mask, new) return [self] def where(self, other, cond, errors="raise") -> List[Block]: # TODO(EA2D): reshape unnecessary with 2D EAs - arr = self.array_values.reshape(self.shape) + # error: "Callable[..., Any]" has no attribute "reshape" + arr = self.array_values.reshape(self.shape) # type: ignore[attr-defined] cond = extract_bool_array(cond) @@ -1862,15 +1862,17 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: by apply. """ # TODO(EA2D): reshape not necessary with 2D EAs - values = self.array_values.reshape(self.shape) + # error: "Callable[..., Any]" has no attribute "reshape" + values = self.array_values.reshape(self.shape) # type: ignore[attr-defined] new_values = values - values.shift(n, axis=axis) new_values = maybe_coerce_values(new_values) return [self.make_block(new_values)] def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> List[Block]: - # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs - values = self.array_values.reshape(self.shape) + # TODO(EA2D) this is unnecessary if these blocks are backed by 2D EA + # error: "Callable[..., Any]" has no attribute "reshape" + values = self.array_values.reshape(self.shape) # type: ignore[attr-defined] new_values = values.shift(periods, fill_value=fill_value, axis=axis) new_values = maybe_coerce_values(new_values) return [self.make_block_same_class(new_values)] @@ -1886,8 +1888,12 @@ def fillna( return self.astype(object).fillna(value, limit, inplace, downcast) values = self.array_values - values = values if inplace else values.copy() - new_values = values.fillna(value=value, limit=limit) + # error: "Callable[..., Any]" has no attribute "copy" + values = values if inplace else values.copy() # type: ignore[attr-defined] + # error: "Callable[..., Any]" has no attribute "fillna" + new_values = values.fillna( # type: ignore[attr-defined] + value=value, limit=limit + ) new_values = maybe_coerce_values(new_values) return [self.make_block_same_class(values=new_values)]