From 446ec4e0df1a64fa2676ce3db750ef9b44a75840 Mon Sep 17 00:00:00 2001 From: Richard Date: Wed, 24 Jun 2020 15:39:50 -0400 Subject: [PATCH 1/2] BUG: quantile should drop non-numeric columns instead of raising --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/core/groupby/groupby.py | 17 ++++++++++++++--- pandas/tests/groupby/test_quantile.py | 8 ++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 9bd4ddbb624d9..1caf6979659fe 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1080,6 +1080,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrame.groupby` lost index, when one of the ``agg`` keys referenced an empty list (:issue:`32580`) - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) - Bug in :meth:`DataFrame.ewm.cov` was throwing ``AssertionError`` for :class:`MultiIndex` inputs (:issue:`34440`) +- Bug in :meth:`core.groupby.DataFrameGroupBy.quantile` raises ``TypeError`` for non-numeric types rather than dropping columns (:issue:`27892`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index d039b715b3c08..1c2f88198a6e9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2400,7 +2400,7 @@ def _get_cythonized_result( signature needs_2d : bool, default False Whether the values and result of the Cython call signature - are at least 2-dimensional. + are 2-dimensional. min_count : int, default None When not None, min_count for the Cython call needs_mask : bool, default False @@ -2416,7 +2416,9 @@ def _get_cythonized_result( Function should return a tuple where the first element is the values to be passed to Cython and the second element is an optional type which the values should be converted to after being returned - by the Cython operation. Raises if `needs_values` is False. + by the Cython operation. This function is also responsible for + raising a TypeError if the values have an invalid type. Raises + if `needs_values` is False. post_processing : function, default None Function to be applied to result of Cython function. Should accept an array of values as the first argument and type inferences as its @@ -2448,6 +2450,7 @@ def _get_cythonized_result( output: Dict[base.OutputKey, np.ndarray] = {} base_func = getattr(libgroupby, how) + error_msg = "" for idx, obj in enumerate(self._iterate_slices()): name = obj.name values = obj._values @@ -2474,7 +2477,11 @@ def _get_cythonized_result( if needs_values: vals = values if pre_processing: - vals, inferences = pre_processing(vals) + try: + vals, inferences = pre_processing(vals) + except TypeError as e: + error_msg = str(e) + continue if needs_2d: vals = vals.reshape((-1, 1)) vals = vals.astype(cython_dtype, copy=False) @@ -2506,6 +2513,10 @@ def _get_cythonized_result( key = base.OutputKey(label=name, position=idx) output[key] = result + # error_msg is "" on an frame/series with no rows or columns + if len(output) == 0 and error_msg != "": + raise TypeError(error_msg) + if aggregate: return self._wrap_aggregated_output(output) else: diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 8cfd8035502c3..903a0ab90e0ce 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -232,3 +232,11 @@ def test_groupby_quantile_nullable_array(values, q): expected = pd.Series(true_quantiles * 2, index=idx, name="b") tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) +def test_groupby_quantile_skips_invalid_dtype(q): + df = pd.DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) + result = df.groupby("a").quantile(0.5) + expected = df.set_index("a")[["b"]] + tm.assert_frame_equal(result, expected) From 441d2510497a38be7beb824f68ee9cff4c977425 Mon Sep 17 00:00:00 2001 From: Richard Date: Wed, 24 Jun 2020 16:40:39 -0400 Subject: [PATCH 2/2] Fixed test to use quantile parameter --- pandas/tests/groupby/test_quantile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 903a0ab90e0ce..9338742195bfe 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -237,6 +237,6 @@ def test_groupby_quantile_nullable_array(values, q): @pytest.mark.parametrize("q", [0.5, [0.0, 0.5, 1.0]]) def test_groupby_quantile_skips_invalid_dtype(q): df = pd.DataFrame({"a": [1], "b": [2.0], "c": ["x"]}) - result = df.groupby("a").quantile(0.5) - expected = df.set_index("a")[["b"]] + result = df.groupby("a").quantile(q) + expected = df.groupby("a")[["b"]].quantile(q) tm.assert_frame_equal(result, expected)