Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Scale Corr #623

Merged
merged 7 commits into from
Feb 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 1 addition & 34 deletions sdc/datatypes/hpat_pandas_series_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2196,40 +2196,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
ty_checker.raise_exc(min_periods, 'int64', 'min_periods')

def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None):
if method not in ('pearson', ''):
raise ValueError("Method corr(). Unsupported parameter. Given method != 'pearson'")

if min_periods is None:
min_periods = 1

if len(self._data) == 0 or len(other._data) == 0:
return numpy.nan

self_arr = self._data[:min(len(self._data), len(other._data))]
other_arr = other._data[:min(len(self._data), len(other._data))]

invalid = numpy.isnan(self_arr) | numpy.isnan(other_arr)
if invalid.any():
self_arr = self_arr[~invalid]
other_arr = other_arr[~invalid]

if len(self_arr) < min_periods:
return numpy.nan

new_self = pandas.Series(self_arr)
new_other = pandas.Series(other_arr)

n = new_self.count()
ma = new_self.sum()
mb = new_other.sum()
a = n * (self_arr * other_arr).sum() - ma * mb
b1 = n * (self_arr * self_arr).sum() - ma * ma
b2 = n * (other_arr * other_arr).sum() - mb * mb

if b1 == 0 or b2 == 0:
return numpy.nan

return a / numpy.sqrt(b1 * b2)
return numpy_like.corr(self, other, method, min_periods)

return hpat_pandas_series_corr_impl

Expand Down
48 changes: 48 additions & 0 deletions sdc/functions/numpy_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,54 @@ def nanmean_impl(a):
return nanmean_impl


def corr(self, other, method='pearson', min_periods=None):
pass


@sdc_overload(corr)
def corr_overload(self, other, method='pearson', min_periods=None):
def corr_impl(self, other, method='pearson', min_periods=None):
if method not in ('pearson', ''):
raise ValueError("Method corr(). Unsupported parameter. Given method != 'pearson'")

if min_periods is None or min_periods < 1:
min_periods = 1

min_len = min(len(self._data), len(other._data))

if min_len == 0:
return numpy.nan

sum_y = 0.
sum_x = 0.
sum_xy = 0.
sum_xx = 0.
sum_yy = 0.
total_count = 0
for i in prange(min_len):
x = self._data[i]
y = other._data[i]
if not (numpy.isnan(x) or numpy.isnan(y)):
sum_x += x
sum_y += y
sum_xy += x * y
sum_xx += x * x
sum_yy += y * y
total_count += 1

if total_count < min_periods:
return numpy.nan

cov_xy = (sum_xy - sum_x * sum_y / total_count)
var_x = (sum_xx - sum_x * sum_x / total_count)
var_y = (sum_yy - sum_y * sum_y / total_count)
corr_xy = cov_xy / numpy.sqrt(var_x * var_y)

return corr_xy

return corr_impl


def nanvar(a):
pass

Expand Down