Skip to content

Commit 25ff715

Browse files
authored
CLN: get_flattened_iterator (#35515)
1 parent 7cf2d0f commit 25ff715

File tree

2 files changed

+23
-34
lines changed

2 files changed

+23
-34
lines changed

pandas/core/groupby/ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
from pandas.core.sorting import (
5151
compress_group_index,
5252
decons_obs_group_ids,
53-
get_flattened_iterator,
53+
get_flattened_list,
5454
get_group_index,
5555
get_group_index_sorter,
5656
get_indexer_dict,
@@ -153,7 +153,7 @@ def _get_group_keys(self):
153153
comp_ids, _, ngroups = self.group_info
154154

155155
# provide "flattened" iterator for multi-group setting
156-
return get_flattened_iterator(comp_ids, ngroups, self.levels, self.codes)
156+
return get_flattened_list(comp_ids, ngroups, self.levels, self.codes)
157157

158158
def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
159159
mutated = self.mutated

pandas/core/sorting.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" miscellaneous sorting / groupby utilities """
2-
from typing import Callable, Optional
2+
from collections import defaultdict
3+
from typing import TYPE_CHECKING, Callable, DefaultDict, Iterable, List, Optional, Tuple
34

45
import numpy as np
56

@@ -18,6 +19,9 @@
1819
import pandas.core.algorithms as algorithms
1920
from pandas.core.construction import extract_array
2021

22+
if TYPE_CHECKING:
23+
from pandas.core.indexes.base import Index # noqa:F401
24+
2125
_INT64_MAX = np.iinfo(np.int64).max
2226

2327

@@ -409,7 +413,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
409413
levels : Optional[List], if values is a MultiIndex, list of levels to
410414
apply the key to.
411415
"""
412-
from pandas.core.indexes.api import Index
416+
from pandas.core.indexes.api import Index # noqa:F811
413417

414418
if not key:
415419
return values
@@ -440,36 +444,21 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None):
440444
return result
441445

442446

443-
class _KeyMapper:
444-
"""
445-
Map compressed group id -> key tuple.
446-
"""
447-
448-
def __init__(self, comp_ids, ngroups: int, levels, labels):
449-
self.levels = levels
450-
self.labels = labels
451-
self.comp_ids = comp_ids.astype(np.int64)
452-
453-
self.k = len(labels)
454-
self.tables = [hashtable.Int64HashTable(ngroups) for _ in range(self.k)]
455-
456-
self._populate_tables()
457-
458-
def _populate_tables(self):
459-
for labs, table in zip(self.labels, self.tables):
460-
table.map(self.comp_ids, labs.astype(np.int64))
461-
462-
def get_key(self, comp_id):
463-
return tuple(
464-
level[table.get_item(comp_id)]
465-
for table, level in zip(self.tables, self.levels)
466-
)
467-
468-
469-
def get_flattened_iterator(comp_ids, ngroups, levels, labels):
470-
# provide "flattened" iterator for multi-group setting
471-
mapper = _KeyMapper(comp_ids, ngroups, levels, labels)
472-
return [mapper.get_key(i) for i in range(ngroups)]
447+
def get_flattened_list(
448+
comp_ids: np.ndarray,
449+
ngroups: int,
450+
levels: Iterable["Index"],
451+
labels: Iterable[np.ndarray],
452+
) -> List[Tuple]:
453+
"""Map compressed group id -> key tuple."""
454+
comp_ids = comp_ids.astype(np.int64, copy=False)
455+
arrays: DefaultDict[int, List[int]] = defaultdict(list)
456+
for labs, level in zip(labels, levels):
457+
table = hashtable.Int64HashTable(ngroups)
458+
table.map(comp_ids, labs.astype(np.int64, copy=False))
459+
for i in range(ngroups):
460+
arrays[i].append(level[table.get_item(i)])
461+
return [tuple(array) for array in arrays.values()]
473462

474463

475464
def get_indexer_dict(label_list, keys):

0 commit comments

Comments
 (0)