Skip to content

Commit ffc90d6

Browse files
committed
BUG: non-unique indexers with a list-like now return in the same order as the passed values
1 parent 6d2c57f commit ffc90d6

File tree

7 files changed

+81
-18
lines changed

7 files changed

+81
-18
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ pandas 0.11.1
148148
.. _GH3552: https://github.com/pydata/pandas/issues/3552
149149
.. _GH3562: https://github.com/pydata/pandas/issues/3562
150150
.. _GH3586: https://github.com/pydata/pandas/issues/3586
151+
.. _GH3561: https://github.com/pydata/pandas/issues/3561
151152
.. _GH3493: https://github.com/pydata/pandas/issues/3493
152153
.. _GH3579: https://github.com/pydata/pandas/issues/3579
153154
.. _GH3593: https://github.com/pydata/pandas/issues/3593

doc/source/indexing.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,6 +1368,9 @@ incompatible the new object internals are with the ``Index`` functions):
13681368
- ``slice_locs``: returns the "range" to slice between two labels
13691369
- ``get_indexer``: Computes the indexing vector for reindexing / data
13701370
alignment purposes. See the source / docstrings for more on this
1371+
- ``get_indexer_non_unique``: Computes the indexing vector for reindexing / data
1372+
alignment purposes when the index is non-unique. See the source / docstrings
1373+
for more on this
13711374
- ``reindex``: Does any pre-conversion of the input index then calls
13721375
``get_indexer``
13731376
- ``union``, ``intersection``: computes the union or intersection of two

pandas/core/index.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -859,6 +859,19 @@ def get_indexer(self, target, method=None, limit=None):
859859

860860
return com._ensure_platform_int(indexer)
861861

862+
def get_indexer_non_unique(self, target, **kwargs):
863+
""" return an indexer suitable for takng from a non unique index
864+
return the labels in the same order ast the target,
865+
target must be an iterable """
866+
target = _ensure_index(target)
867+
pself, ptarget = self._possibly_promote(target)
868+
if pself is not self or ptarget is not target:
869+
return pself.get_indexer_non_unique(ptarget)
870+
871+
if self.is_all_dates:
872+
return Index(Index(self.asi8)._engine.get_indexer_non_unique(target.asi8))
873+
return Index(self._engine.get_indexer_non_unique(target.values))
874+
862875
def _possibly_promote(self, other):
863876
# A hack, but it works
864877
from pandas.tseries.index import DatetimeIndex

pandas/core/indexing.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,8 @@ def _reindex(keys, level=None):
458458
if labels.is_unique:
459459
return _reindex(keyarr, level=level)
460460
else:
461-
mask = labels.isin(keyarr)
462-
return self.obj.take(mask.nonzero()[0], axis=axis, convert=False)
461+
indexer = labels.get_indexer_non_unique(keyarr)
462+
return self.obj.take(indexer, axis=axis, convert=False)
463463

464464
def _convert_to_indexer(self, obj, axis=0):
465465
"""
@@ -569,20 +569,7 @@ def _convert_to_indexer(self, obj, axis=0):
569569

570570
# non-unique (dups)
571571
else:
572-
indexer = []
573-
check = np.arange(len(labels))
574-
lvalues = labels.values
575-
for x in objarr:
576-
# ugh
577-
to_or = lib.map_infer(lvalues, x.__eq__)
578-
if not to_or.any():
579-
raise KeyError('%s not in index' % str(x))
580-
581-
# add the indicies (as we want to take)
582-
indexer.extend(check[to_or])
583-
584-
indexer = Index(indexer)
585-
572+
indexer = check = labels.get_indexer_non_unique(objarr)
586573

587574
mask = check == -1
588575
if mask.any():

pandas/index.pyx

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,37 @@ cdef class IndexEngine:
267267
self._ensure_mapping_populated()
268268
return self.mapping.lookup(values)
269269

270+
def get_indexer_non_unique(self, targets):
271+
""" return an indexer suitable for takng from a non unique index
272+
return the labels in the same order ast the target """
270273

274+
cdef:
275+
ndarray values
276+
ndarray[int64_t] result
277+
object v, val
278+
int count = 0
279+
Py_ssize_t i, j, n
280+
281+
self._ensure_mapping_populated()
282+
values = self._get_index_values()
283+
n = len(values)
284+
n_t = len(targets)
285+
result = np.empty(n, dtype=np.int64)
286+
287+
for i in range(n_t):
288+
val = util.get_value_at(targets, i)
289+
290+
for j in range(n):
291+
v = util.get_value_at(values, j)
292+
293+
if v == val:
294+
result[count] = j
295+
count += 1
296+
297+
if count == 0:
298+
raise KeyError
271299

300+
return result[0:count]
272301

273302
cdef class Int64Engine(IndexEngine):
274303

pandas/tests/test_frame.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4668,8 +4668,28 @@ def _check_df(df,cols=None):
46684668
with ensure_clean() as path:
46694669
df.to_csv(path,cols = cols,chunksize=chunksize)
46704670
rs_c = pd.read_csv(path,index_col=0)
4671-
rs_c.columns = df.columns
4672-
assert_frame_equal(df,rs_c,check_names=False)
4671+
4672+
# we wrote them in a different order
4673+
# so compare them in that order
4674+
if cols is not None:
4675+
4676+
if df.columns.is_unique:
4677+
rs_c.columns = cols
4678+
else:
4679+
rs_c.columns = df.columns.take(df.columns.get_indexer_non_unique(cols))
4680+
4681+
for c in cols:
4682+
obj_df = df[c]
4683+
obj_rs = rs_c[c]
4684+
if isinstance(obj_df,Series):
4685+
assert_series_equal(obj_df,obj_rs)
4686+
else:
4687+
assert_frame_equal(obj_df,obj_rs,check_names=False)
4688+
4689+
# wrote in the same order
4690+
else:
4691+
rs_c.columns = df.columns
4692+
assert_frame_equal(df,rs_c,check_names=False)
46734693

46744694
chunksize=5
46754695
N = int(chunksize*2.5)

pandas/tests/test_indexing.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,16 @@ def test_dups_fancy_indexing(self):
784784

785785
assert_frame_equal(df,result)
786786

787+
# GH 3561, dups not in selected order
788+
ind = ['A', 'A', 'B', 'C']
789+
df = DataFrame({'test':range(len(ind))}, index=ind)
790+
rows = ['C', 'B']
791+
res = df.ix[rows]
792+
self.assert_(rows == list(res.index))
793+
794+
res = df.ix[Index(rows)]
795+
self.assert_(Index(rows).equals(res.index))
796+
787797
def test_indexing_mixed_frame_bug(self):
788798

789799
# GH3492

0 commit comments

Comments
 (0)