Skip to content

BUG: propagate dropna in pd.Grouper #36604

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Sep 26, 2020
11 changes: 10 additions & 1 deletion pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ class Grouper:

.. versionadded:: 1.1.0

dropna : bool, default True
If True, and if group keys contain NA values, NA values together with
row/column will be dropped. If False, NA values will also be treated as
the key in groups.

.. versionadded:: 1.2.0

Returns
-------
A specification for a groupby instruction
Expand Down Expand Up @@ -820,7 +827,9 @@ def is_in_obj(gpr) -> bool:
groupings.append(Grouping(Index([], dtype="int"), np.array([], dtype=np.intp)))

# create the internals grouper
grouper = ops.BaseGrouper(group_axis, groupings, sort=sort, mutated=mutated)
grouper = ops.BaseGrouper(
group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna
)
return grouper, exclusions, obj


Expand Down
2 changes: 2 additions & 0 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __init__(
group_keys: bool = True,
mutated: bool = False,
indexer: Optional[np.ndarray] = None,
dropna: bool = True,
):
assert isinstance(axis, Index), axis

Expand All @@ -97,6 +98,7 @@ def __init__(
self.group_keys = group_keys
self.mutated = mutated
self.indexer = indexer
self.dropna = dropna
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arw2019 this attribute isnt used anywhere except for in the test added in this PR. is it still needed? is it part of a precursor to something on the horizon?


@property
def groupings(self) -> List["grouper.Grouping"]:
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/groupby/test_groupby_dropna.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ def test_groupby_dropna_series_by(dropna, expected):
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("dropna", (False, True))
def test_grouper_dropna_propagation(dropna):
# GH 36604
df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]})
gb = df.groupby("A", dropna=dropna)
assert gb.grouper.dropna == dropna


@pytest.mark.parametrize(
"dropna,df_expected,s_expected",
[
Expand Down