Skip to content

Commit 7b0349f

Browse files
committed
ENH: API change / refactoring in Series.__getitem__ and __setitem__ to implement #86, related tightening of integer index handling from #592
1 parent f735a67 commit 7b0349f

File tree

10 files changed

+278
-143
lines changed

10 files changed

+278
-143
lines changed

pandas/core/generic.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,7 @@ def truncate(self, before=None, after=None, copy=True):
576576
if before is not None and after is not None:
577577
assert(before <= after)
578578

579-
left, right = self.index.slice_locs(before, after)
580-
result = self[left:right]
579+
result = self.ix[before:after]
581580

582581
if isinstance(self.index, MultiIndex):
583582
result.index = self.index.truncate(before, after)

pandas/core/groupby.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,18 +1278,21 @@ def generate_groups(data, label_list, shape, axis=0, factory=lambda x: x):
12781278
elif isinstance(data, DataFrame):
12791279
sorted_data = data.take(indexer, axis=axis)
12801280

1281-
if isinstance(data, DataFrame):
1282-
def slicer(data, slob):
1281+
if isinstance(sorted_data, DataFrame):
1282+
def _get_slice(slob):
12831283
if axis == 0:
1284-
return data[slob]
1284+
return sorted_data[slob]
12851285
else:
1286-
return data.ix[:, slob]
1287-
elif isinstance(data, BlockManager):
1288-
def slicer(data, slob):
1289-
return factory(data.get_slice(slob, axis=axis))
1290-
else:
1291-
def slicer(data, slob):
1292-
return data[slob]
1286+
return sorted_data.ix[:, slob]
1287+
elif isinstance(sorted_data, BlockManager):
1288+
def _get_slice(slob):
1289+
return factory(sorted_data.get_slice(slob, axis=axis))
1290+
elif isinstance(sorted_data, Series):
1291+
def _get_slice(slob):
1292+
return sorted_data._get_values(slob)
1293+
else: # pragma: no cover
1294+
def _get_slice(slob):
1295+
return sorted_data[slob]
12931296

12941297
starts, ends = lib.generate_slices(group_index.astype('i4'),
12951298
np.prod(shape))
@@ -1298,7 +1301,7 @@ def slicer(data, slob):
12981301
if start == end:
12991302
yield i, None
13001303
else:
1301-
yield i, slicer(sorted_data, slice(start, end))
1304+
yield i, _get_slice(slice(start, end))
13021305

13031306
def get_group_index(label_list, shape):
13041307
if len(label_list) == 1:

pandas/core/indexing.py

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __getitem__(self, key):
3131
else:
3232
return self._getitem_axis(key, axis=0)
3333

34-
def _get(self, label, axis=0):
34+
def _get_label(self, label, axis=0):
3535
try:
3636
return self.obj.xs(label, axis=axis, copy=False)
3737
except Exception:
@@ -96,7 +96,7 @@ def _getitem_tuple(self, tup):
9696
# a bit kludgy
9797
if isinstance(self.obj._get_axis(0), MultiIndex):
9898
try:
99-
return self._get(tup, axis=0)
99+
return self._get_label(tup, axis=0)
100100
except (KeyError, TypeError):
101101
pass
102102

@@ -156,40 +156,36 @@ def _getitem_axis(self, key, axis=0):
156156
if com.is_integer(key):
157157
if isinstance(labels, MultiIndex):
158158
try:
159-
return self._get(key, axis=0)
159+
return self._get_label(key, axis=0)
160160
except (KeyError, TypeError):
161161
if _is_integer_index(self.obj.index.levels[0]):
162162
raise
163163

164164
if not is_int_index:
165165
idx = labels[key]
166166

167-
return self._get(idx, axis=0)
167+
return self._get_label(idx, axis=0)
168168
else:
169169
labels = self.obj._get_axis(axis)
170170
lab = key
171171
if com.is_integer(key) and not _is_integer_index(labels):
172172
lab = labels[key]
173-
return self._get(lab, axis=axis)
173+
return self._get_label(lab, axis=axis)
174174

175175
def _getitem_iterable(self, key, axis=0):
176176
labels = self.obj._get_axis(axis)
177177
axis_name = self.obj._get_axis_name(axis)
178178

179-
# asarray can be unsafe, NumPy strings are weird
180-
if isinstance(key, Index):
181-
# want Index objects to pass through untouched
182-
keyarr = key
183-
else:
184-
keyarr = _asarray_tuplesafe(key)
185-
186-
if keyarr.dtype == np.bool_:
187-
if _is_series(key):
188-
if not key.index.equals(labels):
189-
raise IndexingError('Cannot use boolean index with '
190-
'misaligned or unequal labels')
179+
if com._is_bool_indexer(key):
180+
key = _check_bool_indexer(labels, key)
191181
return self.obj.reindex(**{axis_name : labels[np.asarray(key)]})
192182
else:
183+
if isinstance(key, Index):
184+
# want Index objects to pass through untouched
185+
keyarr = key
186+
else:
187+
# asarray can be unsafe, NumPy strings are weird
188+
keyarr = _asarray_tuplesafe(key)
193189
if _is_integer_dtype(keyarr) and not _is_integer_index(labels):
194190
keyarr = labels.take(keyarr)
195191

@@ -244,14 +240,12 @@ def _convert_to_indexer(self, obj, axis=0):
244240
return slicer
245241

246242
elif _is_list_like(obj):
247-
objarr = _asarray_tuplesafe(obj)
248-
249-
if objarr.dtype == np.bool_:
250-
if not obj.index.equals(labels):
251-
raise IndexingError('Cannot use boolean index with '
252-
'misaligned or unequal labels')
243+
if com._is_bool_indexer(obj):
244+
objarr = _check_bool_indexer(labels, obj)
253245
return objarr
254246
else:
247+
objarr = _asarray_tuplesafe(obj)
248+
255249
# If have integer labels, defer to label-based indexing
256250
if _is_integer_dtype(objarr) and not is_int_index:
257251
return objarr
@@ -330,14 +324,33 @@ class _SeriesIndexer(_NDFrameIndexer):
330324
>>> ts.ix[date1:date2] = 0
331325
"""
332326

333-
def _get(self, label, axis=0):
334-
return self.obj[label]
327+
def _get_integer(self, indexer, axis=0):
328+
return self.obj._get_values(indexer)
335329

336-
def _slice(self, obj, axis=0):
337-
return self.obj[obj]
330+
def _get_label(self, key, axis=0):
331+
return self.obj[key]
332+
333+
def _slice(self, indexer, axis=0):
334+
return self.obj._get_values(indexer)
338335

339336
def _setitem_with_indexer(self, indexer, value):
340-
self.obj[indexer] = value
337+
self.obj._set_values(indexer, value)
338+
339+
def _check_bool_indexer(ax, key):
340+
# boolean indexing, need to check that the data are aligned, otherwise
341+
# disallowed
342+
result = key
343+
if _is_series(key) and key.dtype == np.bool_:
344+
if not key.index.equals(ax):
345+
result = key.reindex(ax)
346+
347+
if isinstance(result, np.ndarray) and result.dtype == np.object_:
348+
mask = com.isnull(result)
349+
if mask.any():
350+
raise IndexingError('cannot index with vector containing '
351+
'NA / NaN values')
352+
353+
return result
341354

342355
def _is_series(obj):
343356
from pandas.core.series import Series
@@ -395,4 +408,3 @@ def _maybe_droplevels(index, key):
395408

396409
return index
397410

398-
_isboolarr = lambda x: np.asarray(x).dtype == np.bool_

0 commit comments

Comments
 (0)