diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aeca7782e3ae5..aebf15eb55374 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5980,7 +5980,7 @@ def pivot_table( margins=False, dropna=True, margins_name="All", - observed=False, + observed=True, ): from pandas.core.reshape.pivot import pivot_table diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d653dd87308cf..dbfefc1fa7f4a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -29,7 +29,7 @@ def pivot_table( margins=False, dropna=True, margins_name="All", - observed=False, + observed=True, ): index = _convert_by(index) columns = _convert_by(columns) @@ -92,11 +92,12 @@ def pivot_table( pass values = list(values) - grouped = data.groupby(keys, observed=observed) + if dropna: + grouped = data.groupby(keys, observed=observed) + else: + grouped = data.groupby(keys, observed=False) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): - agged = agged.dropna(how="all") - # gh-21133 # we want to down cast if # the original values are ints @@ -172,10 +173,6 @@ def pivot_table( if len(index) == 0 and len(columns) > 0: table = table.T - # GH 15193 Make sure empty columns are removed if dropna=True - if isinstance(table, ABCDataFrame) and dropna: - table = table.dropna(how="all", axis=1) - return table diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 582084e3bfb5a..ba48bd70bd807 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -185,6 +185,59 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) + def test_pivot_table_keep_nancols(self): # GH18030 + df = pd.DataFrame( + { + "metric_value": [10, 11, 0, 3, np.nan, np.nan, 100, 20], + "metric_name": ["m", "n", "m", "x", "n", "x", "m", "n"], + "product": ["A", "A", "B", "B", "C", "C", "D", "D"], + "measurer": ["Tom", "Tom", "Bill", "Tom", "Bill", "Tom", "Bill", "Tom"], + } + ) + pv_col = df.pivot_table( + "metric_value", + "metric_name", + ["measurer", "product"], + dropna=True, + ) + pv_ind = df.pivot_table( + "metric_value", + ["measurer", "product"], + "metric_name", + dropna=True, + ) + + m = MultiIndex.from_tuples( + [ + ("Bill", "B"), + ("Bill", "C"), + ("Bill", "D"), + ("Tom", "A"), + ("Tom", "B"), + ("Tom", "C"), + ("Tom", "D"), + ], + names=["measurer", "product"], + ) + tm.assert_index_equal(pv_col.columns, m) + tm.assert_index_equal(pv_ind.index, m) + + expected_pv_col = pd.DataFrame( + { + ("Bill", "B"): {"m": 0.0, "n": np.nan, "x": np.nan}, + ("Bill", "C"): {"m": np.nan, "n": np.nan, "x": np.nan}, + ("Bill", "D"): {"m": 100.0, "n": np.nan, "x": np.nan}, + ("Tom", "A"): {"m": 10.0, "n": 11.0, "x": np.nan}, + ("Tom", "B"): {"m": np.nan, "n": np.nan, "x": 3.0}, + ("Tom", "C"): {"m": np.nan, "n": np.nan, "x": np.nan}, + ("Tom", "D"): {"m": np.nan, "n": 20.0, "x": np.nan}, + } + ) + expected_pv_col.index.name = "metric_name" + expected_pv_col.columns.names = ["measurer", "product"] + + tm.assert_frame_equal(pv_col, expected_pv_col) + def test_pivot_table_categorical(self): cat1 = Categorical(