@@ -651,30 +651,19 @@ def factorize_(
651
651
652
652
found_groups .append (np .array (expect ))
653
653
else :
654
- idx , groups = pd .factorize (flat , sort = sort ) # type: ignore[arg-type]
655
654
if expect is not None and reindex :
656
- assert sort
657
- # https://stackoverflow.com/questions/5036816/numpy-lookup-map-or-point/5036900#5036900
658
- # sorter = np.argsort(expect)
659
- # groups = expect[(sorter,)] if sort else expect
660
- #ii = np.argsort(groups)
661
- #C = np.digitize(idx, groups[ii]) - 1
662
- #idx = ii[C]
663
- # key=np.argsort(groups)
664
- # idx=key[groups[key].searchsorted(idx)]
665
- inds = np .searchsorted (expect , groups )
666
- # print(groups, inds)
667
- mask = ~ np .isin (groups , expect ) | (inds == len (expect ))
668
- codes_to_nan_out = np .arange (len (groups ))[mask ]
669
- print (codes_to_nan_out , groupvar .shape , len (groups ))
670
- # codes_to_nan_out, groups, groups[codes_to_nan_out]
671
- # key=np.argsort(expect)
672
- # key = np.arange(len(expect))
673
- # idx=key[groups[key].searchsorted(idx)]
674
- idx = idx [ ]
675
- idx [np .isin (idx , codes_to_nan_out )] = - 1
676
- print (np .unique (idx ))
677
-
655
+ sorter = np .argsort (expect )
656
+ groups = expect [(sorter ,)] if sort else expect
657
+ idx = np .searchsorted (expect , flat , sorter = sorter )
658
+ mask = ~ np .isin (flat , expect ) | isnull (flat ) | (idx == len (expect ))
659
+ if not sort :
660
+ # idx is the index in to the sorted array.
661
+ # if we didn't want sorting, unsort it back
662
+ idx [(idx == len (expect ),)] = - 1
663
+ idx = sorter [(idx ,)]
664
+ idx [mask ] = - 1
665
+ else :
666
+ idx , groups = pd .factorize (flat , sort = sort ) # type: ignore[arg-type]
678
667
679
668
found_groups .append (np .array (groups ))
680
669
factorized .append (idx .reshape (groupvar .shape ))
0 commit comments