pandas-dev · mroeschke · Apr 12, 2023 · Mar 21, 2023 · Apr 8, 2023 · Apr 8, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -149,6 +149,7 @@ Other API changes
 
 Deprecations
 ~~~~~~~~~~~~
+- Deprecated :meth:`.DataFrameGroupBy.apply` operating on the grouping column(s) (:issue:`7155`)
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated :meth:`DataFrame._data` and :meth:`Series._data`, use public APIs instead (:issue:`33333`)
 - Deprecated :meth:`.Groupby.all` and :meth:`.GroupBy.any` with datetime64 or :class:`PeriodDtype` values, matching the :class:`Series` and :class:`DataFrame` deprecations (:issue:`34479`)

@@ -1487,6 +1487,23 @@ def f(g):
         with option_context("mode.chained_assignment", None):
             try:
                 result = self._python_apply_general(f, self._selected_obj)
+                if (
+                    not isinstance(self.obj, Series)
+                    and self._selection is None
+                    and self._selected_obj.shape != self._obj_with_exclusions.shape
+                ):
+                    msg = (
+                        f"{type(self).__name__}.apply operated on the grouping "
+                        f"columns. This behavior is deprecated, and in a future "
+                        f"version of pandas the grouping columns will be excluded "
+                        f"from the operation. Subset the data to exclude the "
+                        f"groupings and silence this warning."
+                    )
+                    warnings.warn(
+                        message=msg,
+                        category=FutureWarning,
+                        stacklevel=find_stack_level(),
+                    )
             except TypeError:
                 # gh-20949
                 # try again, with .apply acting as a filtering

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -33,7 +33,10 @@
     Substitution,
     doc,
 )
-from pandas.util._exceptions import find_stack_level
+from pandas.util._exceptions import (
+    find_stack_level,
+    rewrite_warning,
+)
 
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
@@ -420,6 +423,14 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
             obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
         )
 
+        target_message = "DataFrameGroupBy.apply operated on the grouping columns"
+        new_message = (
+            "DataFrame.resample operated on the grouping columns. "
+            "This behavior is deprecated, and in a future version of "
+            "pandas the grouping columns will be excluded from the operation. "
+            "Subset the data to exclude the groupings and silence this warning."
+        )
+
         try:
             if callable(how):
                 # TODO: test_resample_apply_with_additional_args fails if we go
@@ -436,7 +447,12 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
             #  a DataFrame column, but aggregate_item_by_item operates column-wise
             #  on Series, raising AttributeError or KeyError
             #  (depending on whether the column lookup uses getattr/__getitem__)
-            result = grouped.apply(how, *args, **kwargs)
+            with rewrite_warning(
+                target_message=target_message,
+                target_category=FutureWarning,
+                new_message=new_message,
+            ):
+                result = grouped.apply(how, *args, **kwargs)
 
         except ValueError as err:
             if "Must produce aggregated value" in str(err):
@@ -448,7 +464,12 @@ def _groupby_and_aggregate(self, how, *args, **kwargs):
 
             # we have a non-reducing function
             # try to evaluate
-            result = grouped.apply(how, *args, **kwargs)
+            with rewrite_warning(
+                target_message=target_message,
+                target_category=FutureWarning,
+                new_message=new_message,
+            ):
+                result = grouped.apply(how, *args, **kwargs)
 
         return self._wrap_result(result)
 
@@ -1344,7 +1365,18 @@ def func(x):
 
             return x.apply(f, *args, **kwargs)
 
-        result = self._groupby.apply(func)
+        msg = (
+            "DataFrameGroupBy.resample operated on the grouping columns. "
+            "This behavior is deprecated, and in a future version of "
+            "pandas the grouping columns will be excluded from the operation. "
+            "Subset the data to exclude the groupings and silence this warning."
+        )
+        with rewrite_warning(
+            target_message="DataFrameGroupBy.apply operated on the grouping columns",
+            target_category=FutureWarning,
+            new_message=msg,
+        ):
+            result = self._groupby.apply(func)
         return self._wrap_result(result)
 
     _upsample = _apply

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -15,6 +15,7 @@
     Appender,
     Substitution,
 )
+from pandas.util._exceptions import rewrite_warning
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
@@ -457,7 +458,19 @@ def _all_key():
             return (margins_name,) + ("",) * (len(cols) - 1)
 
         if len(rows) > 0:
-            margin = data[rows].groupby(rows, observed=observed).apply(aggfunc)
+            target_message = "DataFrameGroupBy.apply operated on the grouping columns"
+            new_message = (
+                "DataFrame.pivot_table operated on the grouping columns. "
+                "This behavior is deprecated, and in a future version of "
+                "pandas the grouping columns will be excluded from the operation. "
+                "Can the user do something here?"
+            )
+            with rewrite_warning(
+                target_message=target_message,
+                target_category=FutureWarning,
+                new_message=new_message,
+            ):
+                margin = data[rows].groupby(rows, observed=observed).apply(aggfunc)
             all_key = _all_key()
             table[all_key] = margin
             result = table

diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
@@ -99,9 +99,13 @@ def test_groupby_extension_transform(self, data_for_grouping):
 
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
-        df.groupby("B", group_keys=False).apply(groupby_apply_op)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("B", group_keys=False).apply(groupby_apply_op)
         df.groupby("B", group_keys=False).A.apply(groupby_apply_op)
-        df.groupby("A", group_keys=False).apply(groupby_apply_op)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("A", group_keys=False).apply(groupby_apply_op)
         df.groupby("A", group_keys=False).B.apply(groupby_apply_op)
 
     def test_groupby_apply_identity(self, data_for_grouping):

diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py
@@ -298,9 +298,13 @@ def test_groupby_extension_transform(self, data_for_grouping):
 
     def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
-        df.groupby("B", group_keys=False).apply(groupby_apply_op)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("B", group_keys=False).apply(groupby_apply_op)
         df.groupby("B", group_keys=False).A.apply(groupby_apply_op)
-        df.groupby("A", group_keys=False).apply(groupby_apply_op)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            df.groupby("A", group_keys=False).apply(groupby_apply_op)
         df.groupby("A", group_keys=False).B.apply(groupby_apply_op)
 
     def test_groupby_apply_identity(self, data_for_grouping):

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -1577,7 +1577,9 @@ def test_unstack_bug(self):
             }
         )
 
-        result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)
+        msg = "DataFrameGroupBy.apply operated on the grouping columns"
+        with tm.assert_produces_warning(FutureWarning, match=msg):
+            result = df.groupby(["state", "exp", "barcode", "v"]).apply(len)
 
         unstacked = result.unstack()
         restacked = unstacked.stack()

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -496,13 +496,17 @@ def test_agg_timezone_round_trip():
     assert ts == grouped.first()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert ts == grouped.apply(lambda x: x.iloc[0]).iloc[0, 1]
 
     ts = df["B"].iloc[2]
     assert ts == grouped.last()["B"].iloc[0]
 
     # GH#27110 applying iloc should return a DataFrame
-    assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
+    msg = "DataFrameGroupBy.apply operated on the grouping columns"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        assert ts == grouped.apply(lambda x: x.iloc[-1]).iloc[0, 1]
 
 
 def test_sum_uint64_overflow():