Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 2a26041

Browse files
authored
Scale Corr (#623)
Scale Series.corr
1 parent b07bb9a commit 2a26041

File tree

2 files changed

+49
-34
lines changed

2 files changed

+49
-34
lines changed

sdc/datatypes/hpat_pandas_series_functions.py

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2168,40 +2168,7 @@ def hpat_pandas_series_corr(self, other, method='pearson', min_periods=None):
21682168
ty_checker.raise_exc(min_periods, 'int64', 'min_periods')
21692169

21702170
def hpat_pandas_series_corr_impl(self, other, method='pearson', min_periods=None):
2171-
if method not in ('pearson', ''):
2172-
raise ValueError("Method corr(). Unsupported parameter. Given method != 'pearson'")
2173-
2174-
if min_periods is None:
2175-
min_periods = 1
2176-
2177-
if len(self._data) == 0 or len(other._data) == 0:
2178-
return numpy.nan
2179-
2180-
self_arr = self._data[:min(len(self._data), len(other._data))]
2181-
other_arr = other._data[:min(len(self._data), len(other._data))]
2182-
2183-
invalid = numpy.isnan(self_arr) | numpy.isnan(other_arr)
2184-
if invalid.any():
2185-
self_arr = self_arr[~invalid]
2186-
other_arr = other_arr[~invalid]
2187-
2188-
if len(self_arr) < min_periods:
2189-
return numpy.nan
2190-
2191-
new_self = pandas.Series(self_arr)
2192-
new_other = pandas.Series(other_arr)
2193-
2194-
n = new_self.count()
2195-
ma = new_self.sum()
2196-
mb = new_other.sum()
2197-
a = n * (self_arr * other_arr).sum() - ma * mb
2198-
b1 = n * (self_arr * self_arr).sum() - ma * ma
2199-
b2 = n * (other_arr * other_arr).sum() - mb * mb
2200-
2201-
if b1 == 0 or b2 == 0:
2202-
return numpy.nan
2203-
2204-
return a / numpy.sqrt(b1 * b2)
2171+
return numpy_like.corr(self, other, method, min_periods)
22052172

22062173
return hpat_pandas_series_corr_impl
22072174

sdc/functions/numpy_like.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,54 @@ def nanmean_impl(a):
727727
return nanmean_impl
728728

729729

730+
def corr(self, other, method='pearson', min_periods=None):
731+
pass
732+
733+
734+
@sdc_overload(corr)
735+
def corr_overload(self, other, method='pearson', min_periods=None):
736+
def corr_impl(self, other, method='pearson', min_periods=None):
737+
if method not in ('pearson', ''):
738+
raise ValueError("Method corr(). Unsupported parameter. Given method != 'pearson'")
739+
740+
if min_periods is None or min_periods < 1:
741+
min_periods = 1
742+
743+
min_len = min(len(self._data), len(other._data))
744+
745+
if min_len == 0:
746+
return numpy.nan
747+
748+
sum_y = 0.
749+
sum_x = 0.
750+
sum_xy = 0.
751+
sum_xx = 0.
752+
sum_yy = 0.
753+
total_count = 0
754+
for i in prange(min_len):
755+
x = self._data[i]
756+
y = other._data[i]
757+
if not (numpy.isnan(x) or numpy.isnan(y)):
758+
sum_x += x
759+
sum_y += y
760+
sum_xy += x * y
761+
sum_xx += x * x
762+
sum_yy += y * y
763+
total_count += 1
764+
765+
if total_count < min_periods:
766+
return numpy.nan
767+
768+
cov_xy = (sum_xy - sum_x * sum_y / total_count)
769+
var_x = (sum_xx - sum_x * sum_x / total_count)
770+
var_y = (sum_yy - sum_y * sum_y / total_count)
771+
corr_xy = cov_xy / numpy.sqrt(var_x * var_y)
772+
773+
return corr_xy
774+
775+
return corr_impl
776+
777+
730778
def nanvar(a):
731779
pass
732780

0 commit comments

Comments
 (0)