File tree Expand file tree Collapse file tree 3 files changed +28
-2
lines changed Expand file tree Collapse file tree 3 files changed +28
-2
lines changed Original file line number Diff line number Diff line change @@ -1014,7 +1014,7 @@ Performance Improvements
1014
1014
- Development support for benchmarking with the `Air Speed Velocity library <https://github.com/spacetelescope/asv/>`_ (:issue:`8316`)
1015
1015
- Added vbench benchmarks for alternative ExcelWriter engines and reading Excel files (:issue:`7171`)
1016
1016
- Performance improvements in ``Categorical.value_counts`` (:issue:`10804`)
1017
- - Performance improvements in ``SeriesGroupBy.nunique`` and ``SeriesGroupBy.value_counts`` (:issue:`10820`)
1017
+ - Performance improvements in ``SeriesGroupBy.nunique`` and ``SeriesGroupBy.value_counts`` (:issue:`10820`, :issue:`11077` )
1018
1018
- Performance improvements in ``DataFrame.drop_duplicates`` with integer dtypes (:issue:`10917`)
1019
1019
- 4x improvement in ``timedelta`` string parsing (:issue:`6755`, :issue:`10426`)
1020
1020
- 8x improvement in ``timedelta64`` and ``datetime64`` ops (:issue:`6755`)
Original file line number Diff line number Diff line change @@ -2565,7 +2565,17 @@ def nunique(self, dropna=True):
2565
2565
ids , _ , _ = self .grouper .group_info
2566
2566
val = self .obj .get_values ()
2567
2567
2568
- sorter = np .lexsort ((val , ids ))
2568
+ try :
2569
+ sorter = np .lexsort ((val , ids ))
2570
+ except TypeError : # catches object dtypes
2571
+ assert val .dtype == object , \
2572
+ 'val.dtype must be object, got %s' % val .dtype
2573
+ val , _ = algos .factorize (val , sort = False )
2574
+ sorter = np .lexsort ((val , ids ))
2575
+ isnull = lambda a : a == - 1
2576
+ else :
2577
+ isnull = com .isnull
2578
+
2569
2579
ids , val = ids [sorter ], val [sorter ]
2570
2580
2571
2581
# group boundries are where group ids change
Original file line number Diff line number Diff line change @@ -5511,6 +5511,22 @@ def test_sort(x):
5511
5511
5512
5512
g .apply (test_sort )
5513
5513
5514
+ def test_nunique_with_object (self ):
5515
+ # GH 11077
5516
+ data = pd .DataFrame (
5517
+ [[100 , 1 , 'Alice' ],
5518
+ [200 , 2 , 'Bob' ],
5519
+ [300 , 3 , 'Charlie' ],
5520
+ [- 400 , 4 , 'Dan' ],
5521
+ [500 , 5 , 'Edith' ]],
5522
+ columns = ['amount' , 'id' , 'name' ]
5523
+ )
5524
+
5525
+ result = data .groupby (['id' , 'amount' ])['name' ].nunique ()
5526
+ index = MultiIndex .from_arrays ([data .id , data .amount ])
5527
+ expected = pd .Series ([1 ] * 5 , name = 'name' , index = index )
5528
+ tm .assert_series_equal (result , expected )
5529
+
5514
5530
5515
5531
def assert_fp_equal (a , b ):
5516
5532
assert (np .abs (a - b ) < 1e-12 ).all ()
You can’t perform that action at this time.
0 commit comments