From 6b722baa63259edeec05ef4738471349c71d0df3 Mon Sep 17 00:00:00 2001 From: thanasis Date: Mon, 7 Mar 2016 21:12:31 +0200 Subject: [PATCH 1/6] Fix for issue #12553 --- pandas/core/groupby.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 442f2132847ee..2f7cd16f77a82 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2790,6 +2790,10 @@ def true_and_notnull(x, *args, **kwargs): def nunique(self, dropna=True): """ Returns number of unique elements in the group """ ids, _, _ = self.grouper.group_info + + if len(ids) == 0: # bufix for 12553 + return Series([]) + val = self.obj.get_values() try: @@ -2804,6 +2808,7 @@ def nunique(self, dropna=True): isnull = com.isnull ids, val = ids[sorter], val[sorter] + # group boundaries are where group ids change # unique observations are where sorted values change @@ -2820,7 +2825,7 @@ def nunique(self, dropna=True): inc[idx] = 1 out = np.add.reduceat(inc, idx).astype('int64', copy=False) - res = out if ids[0] != -1 else out[1:] + res = out if ids[0] != -1 else out[1:]) ri = self.grouper.result_index # we might have duplications among the bins From cf9e2d92793de1430d4ddd6fa6a5399cc661b65a Mon Sep 17 00:00:00 2001 From: Thanasis Katsios Date: Mon, 7 Mar 2016 22:30:53 +0200 Subject: [PATCH 2/6] BUG: Fix for issue #12553 --- pandas/core/groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2f7cd16f77a82..d863be0c044c5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2791,8 +2791,8 @@ def nunique(self, dropna=True): """ Returns number of unique elements in the group """ ids, _, _ = self.grouper.group_info - if len(ids) == 0: # bufix for 12553 - return Series([]) + if len(ids) == 0: # bugfix for 12553 + return self._constructor([]) val = self.obj.get_values() @@ -2825,7 +2825,7 @@ def nunique(self, dropna=True): inc[idx] = 1 out = np.add.reduceat(inc, idx).astype('int64', copy=False) - res = out if ids[0] != -1 else out[1:]) + res = out if ids[0] != -1 else out[1:] ri = self.grouper.result_index # we might have duplications among the bins From 748cb45451386ed49e0f519680f89414f9ca2198 Mon Sep 17 00:00:00 2001 From: Thanasis Katsios Date: Mon, 7 Mar 2016 22:32:33 +0200 Subject: [PATCH 3/6] BUG: Fix for issue #12553 --- pandas/core/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index d863be0c044c5..b21557a67235a 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2809,7 +2809,6 @@ def nunique(self, dropna=True): ids, val = ids[sorter], val[sorter] - # group boundaries are where group ids change # unique observations are where sorted values change idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] From 802b3e1193611c9fcdd7c2e067bec052d8cc1150 Mon Sep 17 00:00:00 2001 From: Thanasis Katsios Date: Mon, 7 Mar 2016 22:33:58 +0200 Subject: [PATCH 4/6] BUG: Fix for issue #12553 --- pandas/core/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index b21557a67235a..928799fb39829 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2813,7 +2813,7 @@ def nunique(self, dropna=True): # unique observations are where sorted values change idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] inc = np.r_[1, val[1:] != val[:-1]] - + # 1st item of each group is a new unique observation mask = isnull(val) if dropna: From b024f28839ada30ecf2ee49d5819914a5a153c9f Mon Sep 17 00:00:00 2001 From: Thanasis Katsios Date: Mon, 7 Mar 2016 22:35:27 +0200 Subject: [PATCH 5/6] BUG: Fix for issue #12553 --- pandas/core/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 928799fb39829..cb8211703cda9 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2808,12 +2808,12 @@ def nunique(self, dropna=True): isnull = com.isnull ids, val = ids[sorter], val[sorter] - + # group boundaries are where group ids change # unique observations are where sorted values change idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] inc = np.r_[1, val[1:] != val[:-1]] - + # 1st item of each group is a new unique observation mask = isnull(val) if dropna: From 6a034e8286ca1d8c0520fdede90fcaf1577421d0 Mon Sep 17 00:00:00 2001 From: thanasis Date: Mon, 7 Mar 2016 22:45:58 +0200 Subject: [PATCH 6/6] BUG: Fix for issue #12553 --- pandas/core/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index cb8211703cda9..377f758926899 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2792,7 +2792,7 @@ def nunique(self, dropna=True): ids, _, _ = self.grouper.group_info if len(ids) == 0: # bugfix for 12553 - return self._constructor([]) + return Series([]) val = self.obj.get_values()