-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
ENH: add groupby & reduce support to EA #22762
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
90fb20e
63899aa
05f27a1
9dbbe51
d4c0a3e
bb3f37c
e7fbe0f
fddf938
49efedf
371ab54
d94b85e
6774791
6699080
29c3cf7
0fdaf68
08f5830
3e763c4
b543386
b808778
2b3d96f
3037ccd
b872607
aeaf5f3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
from pandas.compat import u, range, string_types | ||
from pandas.compat import set_function_name | ||
|
||
from pandas.core import nanops | ||
from pandas.core.dtypes.cast import astype_nansafe | ||
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass | ||
from pandas.core.dtypes.common import ( | ||
|
@@ -529,6 +530,31 @@ def cmp_method(self, other): | |
name = '__{name}__'.format(name=op.__name__) | ||
return set_function_name(cmp_method, name, cls) | ||
|
||
def _reduce(self, name, skipna=True, **kwargs): | ||
data = self._data | ||
mask = self._mask | ||
|
||
# coerce to a nan-aware float if needed | ||
if mask.any(): | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
data = self._data.astype('float64') | ||
data[mask] = self._na_value | ||
|
||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
op = getattr(nanops, 'nan' + name) | ||
result = op(data, axis=0, skipna=skipna, mask=mask) | ||
|
||
# if we have a boolean op, don't coerce | ||
if name in ['any', 'all']: | ||
pass | ||
|
||
# if we have a preservable numeric op, | ||
# provide coercion back to an integer type if possible | ||
elif name in ['sum', 'min', 'max'] and notna(result): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should maybe add a test for this specific aspect? (int vs foat return depending on the op) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we already have them There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
But not tests that assert the correct type of the result? Because you changed the behaviour here for certain methods, but didn't need to update any tests, so that seems to indicate this aspect is not covered? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these were already there, just expanded There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are pointing to a groupby test, that didn't change when you did the above update, and that doesn't check that the type for the operations that are impacted (eg mean) ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added a test that I tried to ask for. Can you have a look at it? There is actually some more "complexity" here in the return value. If you access a single value from an IntegerArray, that gives you a np.int64 scalar (or other dtype). Now, the reductions here return a Python int. |
||
int_result = int(result) | ||
if int_result == result: | ||
result = int_result | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return result | ||
|
||
def _maybe_mask_result(self, result, mask, other, op_name): | ||
""" | ||
Parameters | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
import warnings | ||
import pytest | ||
import pandas.util.testing as tm | ||
import pandas as pd | ||
from .base import BaseExtensionTests | ||
|
||
|
||
class BaseReduceTests(BaseExtensionTests): | ||
""" | ||
Reduction specific tests. Generally these only | ||
make sense for numeric/boolean operations. | ||
""" | ||
def check_reduce(self, s, op_name, skipna): | ||
result = getattr(s, op_name)(skipna=skipna) | ||
expected = getattr(s.astype('float64'), op_name)(skipna=skipna) | ||
tm.assert_almost_equal(result, expected) | ||
|
||
|
||
class BaseNoReduceTests(BaseReduceTests): | ||
""" we don't define any reductions """ | ||
|
||
@pytest.mark.parametrize('skipna', [True, False]) | ||
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): | ||
op_name = all_numeric_reductions | ||
s = pd.Series(data) | ||
|
||
with pytest.raises(TypeError): | ||
getattr(s, op_name)(skipna=skipna) | ||
|
||
@pytest.mark.parametrize('skipna', [True, False]) | ||
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): | ||
op_name = all_boolean_reductions | ||
s = pd.Series(data) | ||
|
||
with pytest.raises(TypeError): | ||
getattr(s, op_name)(skipna=skipna) | ||
|
||
|
||
class BaseNumericReduceTests(BaseReduceTests): | ||
|
||
@pytest.mark.parametrize('skipna', [True, False]) | ||
def test_reduce_series(self, data, all_numeric_reductions, skipna): | ||
op_name = all_numeric_reductions | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
s = pd.Series(data) | ||
|
||
# min/max with empty produce numpy warnings | ||
with warnings.catch_warnings(): | ||
warnings.simplefilter("ignore", RuntimeWarning) | ||
self.check_reduce(s, op_name, skipna) | ||
|
||
|
||
class BaseBooleanReduceTests(BaseReduceTests): | ||
|
||
@pytest.mark.parametrize('skipna', [True, False]) | ||
def test_reduce_series(self, data, all_boolean_reductions, skipna): | ||
op_name = all_boolean_reductions | ||
s = pd.Series(data) | ||
self.check_reduce(s, op_name, skipna) |
Uh oh!
There was an error while loading. Please reload this page.