diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index 024ee68ced303..ecf9f9aca4f89 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -129,6 +129,10 @@ API changes strings must contain 244 or fewer characters. Attempting to write Stata dta files with strings longer than 244 characters raises a ``ValueError``. (:issue:`7858`) +- Empty product computations now have a default value of 1. This means that + during resampling, for example, now instead of ``nan``, the value for empty + Periods, Timestamps, etc. will be 1 (:issue:`7889`). + .. _whatsnew_0150.cat: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index aa6140383a27a..b0fcd4c9542c2 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,4 +1,3 @@ -import sys import itertools import functools @@ -10,7 +9,6 @@ except ImportError: # pragma: no cover _USE_BOTTLENECK = False -import pandas.core.common as com import pandas.hashtable as _hash from pandas import compat, lib, algos, tslib from pandas.compat import builtins @@ -49,53 +47,48 @@ def _f(*args, **kwargs): return _f -class bottleneck_switch(object): +def bottleneck_switch(alt=None, zero_value=None, **kwargs): + if alt is None: + return functools.partial(bottleneck_switch, zero_value=zero_value, + **kwargs) - def __init__(self, zero_value=None, **kwargs): - self.zero_value = zero_value - self.kwargs = kwargs - - def __call__(self, alt): - bn_name = alt.__name__ + bn_name = alt.__name__ + try: + bn_func = getattr(bn, bn_name) + except (AttributeError, NameError): # pragma: no cover + bn_func = None + + @functools.wraps(alt) + def f(values, axis=None, skipna=True, **kwds): + for k, v in compat.iteritems(kwargs): + kwds.setdefault(k, v) try: - bn_func = getattr(bn, bn_name) - except (AttributeError, NameError): # pragma: no cover - bn_func = None - - @functools.wraps(alt) - def f(values, axis=None, skipna=True, **kwds): - if len(self.kwargs) > 0: - for k, v in compat.iteritems(self.kwargs): - if k not in kwds: - kwds[k] = v - try: - if self.zero_value is not None and values.size == 0: - if values.ndim == 1: - return 0 - else: - result_shape = (values.shape[:axis] + - values.shape[axis + 1:]) - result = np.empty(result_shape) - result.fill(0) - return result - - if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, - bn_name): - result = bn_func(values, axis=axis, **kwds) - - # prefer to treat inf/-inf as NA, but must compute the func - # twice :( - if _has_infs(result): - result = alt(values, axis=axis, skipna=skipna, **kwds) + if zero_value is not None and values.size == 0: + if values.ndim == 1: + return zero_value else: + result_shape = (values.shape[:axis] + + values.shape[axis + 1:]) + result = np.empty(result_shape) + result.fill(zero_value) + return result + + if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype, + bn_name): + result = bn_func(values, axis=axis, **kwds) + + # prefer to treat inf/-inf as NA, but must compute the func + # twice :( + if _has_infs(result): result = alt(values, axis=axis, skipna=skipna, **kwds) - except Exception: + else: result = alt(values, axis=axis, skipna=skipna, **kwds) + except (ValueError, TypeError, ZeroDivisionError): + result = alt(values, axis=axis, skipna=skipna, **kwds) - return result - - return f + return result + return f def _bn_ok_dtype(dt, name): @@ -121,7 +114,7 @@ def _has_infs(result): return lib.has_infs_f4(result.ravel()) try: return np.isinf(result).any() - except (TypeError, NotImplementedError) as e: + except (TypeError, NotImplementedError): # if it doesn't support infs, then it can't have infs return False @@ -260,7 +253,7 @@ def nansum(values, axis=None, skipna=True): @disallow('M8') -@bottleneck_switch() +@bottleneck_switch def nanmean(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, 0) the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max)) @@ -278,7 +271,7 @@ def nanmean(values, axis=None, skipna=True): @disallow('M8') -@bottleneck_switch() +@bottleneck_switch def nanmedian(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna) @@ -365,7 +358,7 @@ def nansem(values, axis=None, skipna=True, ddof=1): return np.sqrt(var)/np.sqrt(count) -@bottleneck_switch() +@bottleneck_switch def nanmin(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, fill_value_typ='+inf') @@ -395,7 +388,7 @@ def nanmin(values, axis=None, skipna=True): return _maybe_null_out(result, axis, mask) -@bottleneck_switch() +@bottleneck_switch def nanmax(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, fill_value_typ='-inf') @@ -517,6 +510,7 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8') +@bottleneck_switch(zero_value=1) def nanprod(values, axis=None, skipna=True): mask = isnull(values) if skipna and not _is_any_int_dtype(values): diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index fcd4b89377176..a9b1ebbba618c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -3018,6 +3018,8 @@ def test_isnull_for_inf(self): tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) + def test_empty_product(self): + tm.assert_equal(Series().prod(), 1) # TimeSeries-specific diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index ff8b6945a23be..c604bf809a171 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1363,22 +1363,36 @@ def test_aggregate_with_nat(self): normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) - for func in ['min', 'max', 'prod']: + for func in ['min', 'max']: normal_result = getattr(normal_grouped, func)() dt_result = getattr(dt_grouped, func)() pad = DataFrame([[np.nan, np.nan, np.nan, np.nan]], index=[3], columns=['A', 'B', 'C', 'D']) expected = normal_result.append(pad) expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') + expected.index = date_range(start='2013-01-01', freq='D', periods=5, + name='key') + assert_frame_equal(expected, dt_result) + + for func in ['prod']: + normal_result = getattr(normal_grouped, func)() + dt_result = getattr(dt_grouped, func)() + pad = DataFrame([[1] * 4], + index=[3], columns=['A', 'B', 'C', 'D']) + expected = normal_result.append(pad) + expected = expected.sort_index() + expected.index = date_range(start='2013-01-01', freq='D', periods=5, + name='key') assert_frame_equal(expected, dt_result) for func in ['count', 'sum']: normal_result = getattr(normal_grouped, func)() - pad = DataFrame([[0, 0, 0, 0]], index=[3], columns=['A', 'B', 'C', 'D']) + pad = DataFrame([[0, 0, 0, 0]], index=[3], columns=['A', 'B', 'C', + 'D']) expected = normal_result.append(pad) expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') + expected.index = date_range(start='2013-01-01', freq='D', periods=5, + name='key') dt_result = getattr(dt_grouped, func)() assert_frame_equal(expected, dt_result) @@ -1387,7 +1401,8 @@ def test_aggregate_with_nat(self): pad = Series([0], index=[3]) expected = normal_result.append(pad) expected = expected.sort_index() - expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') + expected.index = date_range(start='2013-01-01', freq='D', periods=5, + name='key') dt_result = getattr(dt_grouped, func)() assert_series_equal(expected, dt_result)