diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d77ad59a4bb82..78517e88a5296 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -441,7 +441,6 @@ class Grouping: _codes: npt.NDArray[np.signedinteger] | None = None _group_index: Index | None = None - _passed_categorical: bool _all_grouper: Categorical | None _orig_cats: Index | None _index: Index @@ -460,7 +459,7 @@ def __init__( ) -> None: self.level = level self._orig_grouper = grouper - self.grouping_vector = _convert_grouper(index, grouper) + grouping_vector = _convert_grouper(index, grouper) self._all_grouper = None self._orig_cats = None self._index = index @@ -471,8 +470,6 @@ def __init__( self._dropna = dropna self._uniques = uniques - self._passed_categorical = False - # we have a single grouper which may be a myriad of things, # some of which are dependent on the passing in level @@ -486,78 +483,83 @@ def __init__( else: index_level = index - if self.grouping_vector is None: - self.grouping_vector = index_level + if grouping_vector is None: + grouping_vector = index_level else: - mapper = self.grouping_vector - self.grouping_vector = index_level.map(mapper) + mapper = grouping_vector + grouping_vector = index_level.map(mapper) # a passed Grouper like, directly get the grouper in the same way # as single grouper groupby, use the group_info to get codes - elif isinstance(self.grouping_vector, Grouper): + elif isinstance(grouping_vector, Grouper): # get the new grouper; we already have disambiguated # what key/level refer to exactly, don't need to # check again as we have by this point converted these # to an actual value (rather than a pd.Grouper) assert self.obj is not None # for mypy - newgrouper, newobj = self.grouping_vector._get_grouper( - self.obj, validate=False - ) + newgrouper, newobj = grouping_vector._get_grouper(self.obj, validate=False) self.obj = newobj - ng = newgrouper._get_grouper() if isinstance(newgrouper, ops.BinGrouper): - # in this case we have `ng is newgrouper` - self.grouping_vector = ng + # TODO: can we unwrap this and get a tighter typing + # for self.grouping_vector? + grouping_vector = newgrouper else: # ops.BaseGrouper + # TODO: 2023-02-03 no test cases with len(newgrouper.groupings) > 1. + # If that were to occur, would we be throwing out information? + # error: Cannot determine type of "grouping_vector" [has-type] + ng = newgrouper.groupings[0].grouping_vector # type: ignore[has-type] # use Index instead of ndarray so we can recover the name - self.grouping_vector = Index(ng, name=newgrouper.result_index.name) + grouping_vector = Index(ng, name=newgrouper.result_index.name) elif not isinstance( - self.grouping_vector, (Series, Index, ExtensionArray, np.ndarray) + grouping_vector, (Series, Index, ExtensionArray, np.ndarray) ): # no level passed - if getattr(self.grouping_vector, "ndim", 1) != 1: - t = self.name or str(type(self.grouping_vector)) + if getattr(grouping_vector, "ndim", 1) != 1: + t = str(type(grouping_vector)) raise ValueError(f"Grouper for '{t}' not 1-dimensional") - self.grouping_vector = index.map(self.grouping_vector) + grouping_vector = index.map(grouping_vector) if not ( - hasattr(self.grouping_vector, "__len__") - and len(self.grouping_vector) == len(index) + hasattr(grouping_vector, "__len__") + and len(grouping_vector) == len(index) ): - grper = pprint_thing(self.grouping_vector) + grper = pprint_thing(grouping_vector) errmsg = ( "Grouper result violates len(labels) == " f"len(data)\nresult: {grper}" ) - self.grouping_vector = None # Try for sanity raise AssertionError(errmsg) - if isinstance(self.grouping_vector, np.ndarray): - if self.grouping_vector.dtype.kind in ["m", "M"]: + if isinstance(grouping_vector, np.ndarray): + if grouping_vector.dtype.kind in ["m", "M"]: # if we have a date/time-like grouper, make sure that we have # Timestamps like # TODO 2022-10-08 we only have one test that gets here and # values are already in nanoseconds in that case. - self.grouping_vector = Series(self.grouping_vector).to_numpy() - elif is_categorical_dtype(self.grouping_vector): + grouping_vector = Series(grouping_vector).to_numpy() + elif is_categorical_dtype(grouping_vector): # a passed Categorical - self._passed_categorical = True - - self._orig_cats = self.grouping_vector.categories - self.grouping_vector, self._all_grouper = recode_for_groupby( - self.grouping_vector, sort, observed + self._orig_cats = grouping_vector.categories + grouping_vector, self._all_grouper = recode_for_groupby( + grouping_vector, sort, observed ) + self.grouping_vector = grouping_vector + def __repr__(self) -> str: return f"Grouping({self.name})" def __iter__(self) -> Iterator: return iter(self.indices) + @cache_readonly + def _passed_categorical(self) -> bool: + return is_categorical_dtype(self.grouping_vector) + @cache_readonly def name(self) -> Hashable: ilevel = self._ilevel diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index bff61ec135d74..08d657a41e332 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -745,15 +745,6 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: ids, _, ngroups = self.group_info return get_splitter(data, ids, ngroups, axis=axis) - def _get_grouper(self): - """ - We are a grouper as part of another's groupings. - - We have a specific method of grouping, so cannot - convert to a Index for our grouper. - """ - return self.groupings[0].grouping_vector - @final @cache_readonly def group_keys_seq(self): @@ -1112,15 +1103,6 @@ def nkeys(self) -> int: # still matches len(self.groupings), but we can hard-code return 1 - def _get_grouper(self): - """ - We are a grouper as part of another's groupings. - - We have a specific method of grouping, so cannot - convert to a Index for our grouper. - """ - return self - def get_iterator(self, data: NDFrame, axis: AxisInt = 0): """ Groupby iterator