diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 3a749708fb526..4fd2fd815d1f5 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -243,6 +243,7 @@ Groupby/resample/rolling grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex` or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument, the function operated on the whole index rather than each element of the index. (:issue:`51979`) +- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`) - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64 or :class:`PeriodDtype` values (:issue:`52128`) - diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b72a21f1aa0c6..a814b10e02e49 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1186,6 +1186,9 @@ def groups(self): } return result + def __iter__(self) -> Iterator[Hashable]: + return iter(self.groupings[0].grouping_vector) + @property def nkeys(self) -> int: # still matches len(self.groupings), but we can hard-code diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 8e84a48eb7374..db2ce709d764d 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -447,6 +447,29 @@ def test_groupby_grouper_f_sanity_checked(self): expected.index.freq = None tm.assert_series_equal(result, expected) + def test_groupby_with_datetime_key(self): + # GH 51158 + df = DataFrame( + { + "id": ["a", "b"] * 3, + "b": date_range("2000-01-01", "2000-01-03", freq="9H"), + } + ) + grouper = Grouper(key="b", freq="D") + gb = df.groupby([grouper, "id"]) + + # test number of groups + expected = { + (Timestamp("2000-01-01"), "a"): [0, 2], + (Timestamp("2000-01-01"), "b"): [1], + (Timestamp("2000-01-02"), "a"): [4], + (Timestamp("2000-01-02"), "b"): [3, 5], + } + tm.assert_dict_equal(gb.groups, expected) + + # test number of group keys + assert len(gb.groups.keys()) == 4 + def test_grouping_error_on_multidim_input(self, df): msg = "Grouper for '' not 1-dimensional" with pytest.raises(ValueError, match=msg):