Skip to content

BUG: define empty product on Series and DataFrame to be 1 #7928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,10 @@ API changes
strings must contain 244 or fewer characters. Attempting to write Stata
dta files with strings longer than 244 characters raises a ``ValueError``. (:issue:`7858`)

- Empty product computations now have a default value of 1. This means that
during resampling, for example, now instead of ``nan``, the value for empty
Periods, Timestamps, etc. will be 1 (:issue:`7889`).


.. _whatsnew_0150.cat:

Expand Down
88 changes: 41 additions & 47 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import sys
import itertools
import functools

Expand All @@ -10,7 +9,6 @@
except ImportError: # pragma: no cover
_USE_BOTTLENECK = False

import pandas.core.common as com
import pandas.hashtable as _hash
from pandas import compat, lib, algos, tslib
from pandas.compat import builtins
Expand Down Expand Up @@ -49,53 +47,48 @@ def _f(*args, **kwargs):
return _f


class bottleneck_switch(object):
def bottleneck_switch(alt=None, zero_value=None, **kwargs):
if alt is None:
return functools.partial(bottleneck_switch, zero_value=zero_value,
**kwargs)

def __init__(self, zero_value=None, **kwargs):
self.zero_value = zero_value
self.kwargs = kwargs

def __call__(self, alt):
bn_name = alt.__name__
bn_name = alt.__name__

try:
bn_func = getattr(bn, bn_name)
except (AttributeError, NameError): # pragma: no cover
bn_func = None

@functools.wraps(alt)
def f(values, axis=None, skipna=True, **kwds):
for k, v in compat.iteritems(kwargs):
kwds.setdefault(k, v)
try:
bn_func = getattr(bn, bn_name)
except (AttributeError, NameError): # pragma: no cover
bn_func = None

@functools.wraps(alt)
def f(values, axis=None, skipna=True, **kwds):
if len(self.kwargs) > 0:
for k, v in compat.iteritems(self.kwargs):
if k not in kwds:
kwds[k] = v
try:
if self.zero_value is not None and values.size == 0:
if values.ndim == 1:
return 0
else:
result_shape = (values.shape[:axis] +
values.shape[axis + 1:])
result = np.empty(result_shape)
result.fill(0)
return result

if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
bn_name):
result = bn_func(values, axis=axis, **kwds)

# prefer to treat inf/-inf as NA, but must compute the func
# twice :(
if _has_infs(result):
result = alt(values, axis=axis, skipna=skipna, **kwds)
if zero_value is not None and values.size == 0:
if values.ndim == 1:
return zero_value
else:
result_shape = (values.shape[:axis] +
values.shape[axis + 1:])
result = np.empty(result_shape)
result.fill(zero_value)
return result

if _USE_BOTTLENECK and skipna and _bn_ok_dtype(values.dtype,
bn_name):
result = bn_func(values, axis=axis, **kwds)

# prefer to treat inf/-inf as NA, but must compute the func
# twice :(
if _has_infs(result):
result = alt(values, axis=axis, skipna=skipna, **kwds)
except Exception:
else:
result = alt(values, axis=axis, skipna=skipna, **kwds)
except (ValueError, TypeError, ZeroDivisionError):
result = alt(values, axis=axis, skipna=skipna, **kwds)

return result

return f
return result
return f


def _bn_ok_dtype(dt, name):
Expand All @@ -121,7 +114,7 @@ def _has_infs(result):
return lib.has_infs_f4(result.ravel())
try:
return np.isinf(result).any()
except (TypeError, NotImplementedError) as e:
except (TypeError, NotImplementedError):
# if it doesn't support infs, then it can't have infs
return False

Expand Down Expand Up @@ -260,7 +253,7 @@ def nansum(values, axis=None, skipna=True):


@disallow('M8')
@bottleneck_switch()
@bottleneck_switch
def nanmean(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna, 0)
the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_max))
Expand All @@ -278,7 +271,7 @@ def nanmean(values, axis=None, skipna=True):


@disallow('M8')
@bottleneck_switch()
@bottleneck_switch
def nanmedian(values, axis=None, skipna=True):

values, mask, dtype, dtype_max = _get_values(values, skipna)
Expand Down Expand Up @@ -365,7 +358,7 @@ def nansem(values, axis=None, skipna=True, ddof=1):
return np.sqrt(var)/np.sqrt(count)


@bottleneck_switch()
@bottleneck_switch
def nanmin(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna,
fill_value_typ='+inf')
Expand Down Expand Up @@ -395,7 +388,7 @@ def nanmin(values, axis=None, skipna=True):
return _maybe_null_out(result, axis, mask)


@bottleneck_switch()
@bottleneck_switch
def nanmax(values, axis=None, skipna=True):
values, mask, dtype, dtype_max = _get_values(values, skipna,
fill_value_typ='-inf')
Expand Down Expand Up @@ -517,6 +510,7 @@ def nankurt(values, axis=None, skipna=True):


@disallow('M8')
@bottleneck_switch(zero_value=1)
def nanprod(values, axis=None, skipna=True):
mask = isnull(values)
if skipna and not _is_any_int_dtype(values):
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3018,6 +3018,8 @@ def test_isnull_for_inf(self):
tm.assert_series_equal(r, e)
tm.assert_series_equal(dr, de)

def test_empty_product(self):
tm.assert_equal(Series().prod(), 1)

# TimeSeries-specific

Expand Down
25 changes: 20 additions & 5 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1363,22 +1363,36 @@ def test_aggregate_with_nat(self):
normal_grouped = normal_df.groupby('key')
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))

for func in ['min', 'max', 'prod']:
for func in ['min', 'max']:
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[np.nan, np.nan, np.nan, np.nan]],
index=[3], columns=['A', 'B', 'C', 'D'])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
expected.index = date_range(start='2013-01-01', freq='D', periods=5,
name='key')
assert_frame_equal(expected, dt_result)

for func in ['prod']:
normal_result = getattr(normal_grouped, func)()
dt_result = getattr(dt_grouped, func)()
pad = DataFrame([[1] * 4],
index=[3], columns=['A', 'B', 'C', 'D'])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start='2013-01-01', freq='D', periods=5,
name='key')
assert_frame_equal(expected, dt_result)

for func in ['count', 'sum']:
normal_result = getattr(normal_grouped, func)()
pad = DataFrame([[0, 0, 0, 0]], index=[3], columns=['A', 'B', 'C', 'D'])
pad = DataFrame([[0, 0, 0, 0]], index=[3], columns=['A', 'B', 'C',
'D'])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
expected.index = date_range(start='2013-01-01', freq='D', periods=5,
name='key')
dt_result = getattr(dt_grouped, func)()
assert_frame_equal(expected, dt_result)

Expand All @@ -1387,7 +1401,8 @@ def test_aggregate_with_nat(self):
pad = Series([0], index=[3])
expected = normal_result.append(pad)
expected = expected.sort_index()
expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
expected.index = date_range(start='2013-01-01', freq='D', periods=5,
name='key')
dt_result = getattr(dt_grouped, func)()
assert_series_equal(expected, dt_result)

Expand Down