diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index ac3d645684fda..426822c19fd6f 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -122,6 +122,7 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`pandas.core.groupby.DataFrameGroupBy.transform` where applying a timezone conversion lambda function would drop timezone information (:issue:`27496`) +- Bug in :meth:`pandas.core.groupby.GroupBy.nth` where ``observed=False`` was being ignored for Categorical groupers (:issue:`26385`) - Bug in windowing over read-only arrays (:issue:`27766`) - - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9aba9723e0546..b852513e454a2 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1771,7 +1771,11 @@ def nth(self, n: Union[int, List[int]], dropna: Optional[str] = None) -> DataFra if not self.as_index: return out - out.index = self.grouper.result_index[ids[mask]] + result_index = self.grouper.result_index + out.index = result_index[ids[mask]] + + if not self.observed and isinstance(result_index, CategoricalIndex): + out = out.reindex(result_index) return out.sort_index() if self.sort else out diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 99cc4cf0ffbd1..9750a36d9350b 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -434,6 +434,21 @@ def test_observed_groups_with_nan(observed): tm.assert_dict_equal(result, expected) +def test_observed_nth(): + # GH 26385 + cat = pd.Categorical(["a", np.nan, np.nan], categories=["a", "b", "c"]) + ser = pd.Series([1, 2, 3]) + df = pd.DataFrame({"cat": cat, "ser": ser}) + + result = df.groupby("cat", observed=False)["ser"].nth(0) + + index = pd.Categorical(["a", "b", "c"], categories=["a", "b", "c"]) + expected = pd.Series([1, np.nan, np.nan], index=index, name="ser") + expected.index.name = "cat" + + tm.assert_series_equal(result, expected) + + def test_dataframe_categorical_with_nan(observed): # GH 21151 s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])