diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py index b96464173..cf5c3f2a5 100644 --- a/sdc/datatypes/hpat_pandas_series_functions.py +++ b/sdc/datatypes/hpat_pandas_series_functions.py @@ -2196,40 +2196,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None): ty_checker.raise_exc(min_periods, 'int64', 'min_periods') def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None): - if method not in ('pearson', ''): - raise ValueError("Method corr(). Unsupported parameter. Given method != 'pearson'") - - if min_periods is None: - min_periods = 1 - - if len(self._data) == 0 or len(other._data) == 0: - return numpy.nan - - self_arr = self._data[:min(len(self._data), len(other._data))] - other_arr = other._data[:min(len(self._data), len(other._data))] - - invalid = numpy.isnan(self_arr) | numpy.isnan(other_arr) - if invalid.any(): - self_arr = self_arr[~invalid] - other_arr = other_arr[~invalid] - - if len(self_arr) < min_periods: - return numpy.nan - - new_self = pandas.Series(self_arr) - new_other = pandas.Series(other_arr) - - n = new_self.count() - ma = new_self.sum() - mb = new_other.sum() - a = n * (self_arr * other_arr).sum() - ma * mb - b1 = n * (self_arr * self_arr).sum() - ma * ma - b2 = n * (other_arr * other_arr).sum() - mb * mb - - if b1 == 0 or b2 == 0: - return numpy.nan - - return a / numpy.sqrt(b1 * b2) + return numpy_like.corr(self, other, method, min_periods) return hpat_pandas_series_corr_impl diff --git a/sdc/functions/numpy_like.py b/sdc/functions/numpy_like.py index 3ac537d4e..67269488f 100644 --- a/sdc/functions/numpy_like.py +++ b/sdc/functions/numpy_like.py @@ -727,6 +727,54 @@ def nanmean_impl(a): return nanmean_impl +def corr(self, other, method='pearson', min_periods=None): + pass + + +@sdc_overload(corr) +def corr_overload(self, other, method='pearson', min_periods=None): + def corr_impl(self, other, method='pearson', min_periods=None): + if method not in ('pearson', ''): + raise ValueError("Method corr(). Unsupported parameter. Given method != 'pearson'") + + if min_periods is None or min_periods < 1: + min_periods = 1 + + min_len = min(len(self._data), len(other._data)) + + if min_len == 0: + return numpy.nan + + sum_y = 0. + sum_x = 0. + sum_xy = 0. + sum_xx = 0. + sum_yy = 0. + total_count = 0 + for i in prange(min_len): + x = self._data[i] + y = other._data[i] + if not (numpy.isnan(x) or numpy.isnan(y)): + sum_x += x + sum_y += y + sum_xy += x * y + sum_xx += x * x + sum_yy += y * y + total_count += 1 + + if total_count < min_periods: + return numpy.nan + + cov_xy = (sum_xy - sum_x * sum_y / total_count) + var_x = (sum_xx - sum_x * sum_x / total_count) + var_y = (sum_yy - sum_y * sum_y / total_count) + corr_xy = cov_xy / numpy.sqrt(var_x * var_y) + + return corr_xy + + return corr_impl + + def nanvar(a): pass