-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
DEPR: DataFrameGroupBy.apply operating on the group keys #52477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
90c0ecf
8d93efc
0337e61
dd5f0f8
c6fa2dc
fe66a65
d3b2a65
7475648
574993d
1ddd766
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1487,6 +1487,23 @@ def f(g): | |
with option_context("mode.chained_assignment", None): | ||
try: | ||
result = self._python_apply_general(f, self._selected_obj) | ||
if ( | ||
not isinstance(self.obj, Series) | ||
and self._selection is None | ||
and self._selected_obj.shape != self._obj_with_exclusions.shape | ||
): | ||
msg = ( | ||
f"{type(self).__name__}.apply operated on the grouping " | ||
f"columns. This behavior is deprecated, and in a future " | ||
f"version of pandas the grouping columns will be excluded " | ||
f"from the operation. Subset the data to exclude the " | ||
f"groupings and silence this warning." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Subset the data to exclude the groupings and silence this warning." -> "Subset the data to silence this warning."? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the benefit of removing the phrase "to exclude the groupings"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My idea was that by keeping the grouping columns in the subsetting, the users are guaranteed to get the same rseult as before, but without the warning: >>> df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
>>> g = df.groupby('A')
>>> g.apply(lambda x: x.sum())
A B
A
1 2 3
2 4 7
>>> g[df.columns].apply(lambda x: x.sum())
A B
A
1 2 3
2 4 7 Or they may actually not want to include, then remove it from the subsetting, >>> g[df.columns.drop("A")].apply(lambda x: x.sum())
B
A
1 3
2 7 the idea is just that they may/may not want to remove the groupings in the subset and "exclude the groupings" may not be what they want in all cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see - this makes sense, but I find it confusing to call this "subsetting". I'll see what I can do for the warning message here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. What do you think? |
||
) | ||
warnings.warn( | ||
message=msg, | ||
category=FutureWarning, | ||
stacklevel=find_stack_level(), | ||
) | ||
except TypeError: | ||
# gh-20949 | ||
# try again, with .apply acting as a filtering | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,10 @@ | |
Substitution, | ||
doc, | ||
) | ||
from pandas.util._exceptions import find_stack_level | ||
from pandas.util._exceptions import ( | ||
find_stack_level, | ||
rewrite_warning, | ||
) | ||
|
||
from pandas.core.dtypes.generic import ( | ||
ABCDataFrame, | ||
|
@@ -420,6 +423,14 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): | |
obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys | ||
) | ||
|
||
target_message = "DataFrameGroupBy.apply operated on the grouping columns" | ||
new_message = ( | ||
"DataFrame.resample operated on the grouping columns. " | ||
"This behavior is deprecated, and in a future version of " | ||
"pandas the grouping columns will be excluded from the operation. " | ||
"Subset the data to exclude the groupings and silence this warning." | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "Subset the data to exclude the groupings and silence this warning." -> Subset the data to silence this warning." |
||
) | ||
|
||
try: | ||
if callable(how): | ||
# TODO: test_resample_apply_with_additional_args fails if we go | ||
|
@@ -436,7 +447,12 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): | |
# a DataFrame column, but aggregate_item_by_item operates column-wise | ||
# on Series, raising AttributeError or KeyError | ||
# (depending on whether the column lookup uses getattr/__getitem__) | ||
result = grouped.apply(how, *args, **kwargs) | ||
with rewrite_warning( | ||
target_message=target_message, | ||
target_category=FutureWarning, | ||
new_message=new_message, | ||
): | ||
result = grouped.apply(how, *args, **kwargs) | ||
|
||
except ValueError as err: | ||
if "Must produce aggregated value" in str(err): | ||
|
@@ -448,7 +464,12 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): | |
|
||
# we have a non-reducing function | ||
# try to evaluate | ||
result = grouped.apply(how, *args, **kwargs) | ||
with rewrite_warning( | ||
target_message=target_message, | ||
target_category=FutureWarning, | ||
new_message=new_message, | ||
): | ||
result = grouped.apply(how, *args, **kwargs) | ||
|
||
return self._wrap_result(result) | ||
|
||
|
@@ -1344,7 +1365,18 @@ def func(x): | |
|
||
return x.apply(f, *args, **kwargs) | ||
|
||
result = self._groupby.apply(func) | ||
msg = ( | ||
"DataFrameGroupBy.resample operated on the grouping columns. " | ||
"This behavior is deprecated, and in a future version of " | ||
"pandas the grouping columns will be excluded from the operation. " | ||
"Subset the data to exclude the groupings and silence this warning." | ||
topper-123 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
with rewrite_warning( | ||
target_message="DataFrameGroupBy.apply operated on the grouping columns", | ||
target_category=FutureWarning, | ||
new_message=msg, | ||
): | ||
result = self._groupby.apply(func) | ||
return self._wrap_result(result) | ||
|
||
_upsample = _apply | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A small explanation about what to do to avoid the warning?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - done.