Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Scale var/std #616

Merged
merged 1 commit into from
Feb 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1497,7 +1497,7 @@ def hpat_pandas_series_var_impl(self, axis=None, skipna=None, level=None, ddof=1
if valuable_length <= ddof:
return numpy.nan

return numpy.nanvar(self._data) * valuable_length / (valuable_length - ddof)
return numpy_like.nanvar(self._data) * valuable_length / (valuable_length - ddof)

if len(self._data) <= ddof:
return numpy.nan
Expand Down
29 changes: 29 additions & 0 deletions sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,3 +497,32 @@ def nanmean_impl(a):
return np.divide(c, count)

return nanmean_impl


def nanvar(a):
pass


@sdc_overload(nanvar)
def np_nanvar(a):
if not isinstance(a, types.Array):
return
isnan = get_isnan(a.dtype)

def nanvar_impl(a):
# Compute the mean
m = nanmean(a)

# Compute the sum of square diffs
ssd = 0.0
count = 0
for i in prange(len(a)):
v = a[i]
if not isnan(v):
val = (v.item() - m)
ssd += np.real(val * np.conj(val))
count += 1
# np.divide() doesn't raise ZeroDivisionError
return np.divide(ssd, count)

return nanvar_impl
17 changes: 15 additions & 2 deletions sdc/tests/test_sdc_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,10 @@ def sdc_impl():

class TestArrayReductions(TestCase):

def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True):
def check_reduction_basic(self, pyfunc, alt_pyfunc, all_nans=True, comparator=None):
if not comparator:
comparator = np.testing.assert_array_equal

alt_cfunc = self.jit(alt_pyfunc)

def cases():
Expand All @@ -262,7 +265,7 @@ def cases():

for case in cases():
with self.subTest(data=case):
np.testing.assert_array_equal(alt_cfunc(case), pyfunc(case))
comparator(alt_cfunc(case), pyfunc(case))

def test_nanmean(self):
def ref_impl(a):
Expand Down Expand Up @@ -309,6 +312,16 @@ def sdc_impl(a):

self.check_reduction_basic(ref_impl, sdc_impl)

def test_nanvar(self):
def ref_impl(a):
return np.nanvar(a)

def sdc_impl(a):
return numpy_like.nanvar(a)

self.check_reduction_basic(ref_impl, sdc_impl,
comparator=np.testing.assert_array_almost_equal)

def test_sum(self):
def ref_impl(a):
return np.sum(a)
Expand Down
5 changes: 5 additions & 0 deletions sdc/tests/tests_perf/test_perf_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ def _test_case(self, cases, name, total_data_length, data_num=1, input_data=test
CE(type_='Numba', code='np.nanprod(data)', jitted=True),
CE(type_='SDC', code='sdc.functions.numpy_like.nanprod(data)', jitted=True),
], usecase_params='data'),
TC(name='nanvar', size=[10 ** 7], call_expr=[
CE(type_='Python', code='np.nanvar(data)', jitted=False),
CE(type_='Numba', code='np.nanvar(data)', jitted=True),
CE(type_='SDC', code='sdc.functions.numpy_like.nanvar(data)', jitted=True),
], usecase_params='data'),
TC(name='sum', size=[10 ** 7], call_expr=[
CE(type_='Python', code='np.sum(data)', jitted=False),
CE(type_='Numba', code='np.sum(data)', jitted=True),
Expand Down
6 changes: 4 additions & 2 deletions sdc/tests/tests_perf/test_perf_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,16 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1, input_data=tes
TC(name='shift', size=[10 ** 8]),
TC(name='size', size=[10 ** 7], call_expr='data.size', usecase_params='data'),
TC(name='sort_values', size=[10 ** 5]),
TC(name='std', size=[10 ** 7]),
TC(name='std', size=[10 ** 7], params='skipna=True'),
TC(name='std', size=[10 ** 7], params='skipna=False'),
TC(name='sub', size=[10 ** 7], params='other', data_num=2),
TC(name='sum', size=[10 ** 8]),
TC(name='take', size=[10 ** 7], call_expr='data.take([0])', usecase_params='data'),
TC(name='truediv', size=[10 ** 7], params='other', data_num=2),
TC(name='values', size=[10 ** 7], call_expr='data.values', usecase_params='data'),
TC(name='value_counts', size=[10 ** 6]),
TC(name='var', size=[10 ** 8]),
TC(name='var', size=[10 ** 8], params='skipna=True'),
TC(name='var', size=[10 ** 8], params='skipna=False'),
TC(name='unique', size=[10 ** 5]),
]

Expand Down