Skip to content

Commit 564175e

Browse files
committed
Merge remote branch 'jreback/maskv3'
* jreback/maskv3: added back mask method that does condition inversion added condition testing to where that raised ValueError on an invalid condition (e.g. not an ndarray like object) added tests for same in core/frame.py fixed file modes for core/frame.py, test/test_frame.py relaxed __setitem__ restriction on boolean indexing a frame on an equal sized frame in core/frame.py
2 parents d56d0e6 + a414346 commit 564175e

File tree

2 files changed

+38
-22
lines changed

2 files changed

+38
-22
lines changed

pandas/core/frame.py

100644100755
Lines changed: 15 additions & 19 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -1775,9 +1775,8 @@ def __getitem__(self, key):
1775
elif isinstance(self.columns, MultiIndex):
1775
elif isinstance(self.columns, MultiIndex):
1776
return self._getitem_multilevel(key)
1776
return self._getitem_multilevel(key)
1777
elif isinstance(key, DataFrame):
1777
elif isinstance(key, DataFrame):
1778-
values = key.values
1778+
if key.values.dtype == bool:
1779-
if values.dtype == bool:
1779+
return self.where(key)
1780-
return self.values[values]
1781
else:
1780
else:
1782
raise ValueError('Cannot index using non-boolean DataFrame')
1781
raise ValueError('Cannot index using non-boolean DataFrame')
1783
else:
1782
else:
@@ -1871,11 +1870,6 @@ def __setitem__(self, key, value):
1871
# support boolean setting with DataFrame input, e.g.
1870
# support boolean setting with DataFrame input, e.g.
1872
# df[df > df2] = 0
1871
# df[df > df2] = 0
1873
if isinstance(key, DataFrame):
1872
if isinstance(key, DataFrame):
1874-
if not (key.index.equals(self.index) and
1875-
key.columns.equals(self.columns)):
1876-
raise PandasError('Can only index with like-indexed '
1877-
'DataFrame objects')
1878-
1879
self._boolean_set(key, value)
1873
self._boolean_set(key, value)
1880
elif isinstance(key, (np.ndarray, list)):
1874
elif isinstance(key, (np.ndarray, list)):
1881
return self._set_item_multiple(key, value)
1875
return self._set_item_multiple(key, value)
@@ -1884,18 +1878,13 @@ def __setitem__(self, key, value):
1884
self._set_item(key, value)
1878
self._set_item(key, value)
1885

1879

1886
def _boolean_set(self, key, value):
1880
def _boolean_set(self, key, value):
1887-
mask = key.values
1881+
if key.values.dtype != np.bool_:
1888-
if mask.dtype != np.bool_:
1889
raise ValueError('Must pass DataFrame with boolean values only')
1882
raise ValueError('Must pass DataFrame with boolean values only')
1890

1883

1891
if self._is_mixed_type:
1884
if self._is_mixed_type:
1892
raise ValueError('Cannot do boolean setting on mixed-type frame')
1885
raise ValueError('Cannot do boolean setting on mixed-type frame')
1893

1886

1894-
if isinstance(value, DataFrame):
1887+
self.where(key, value, inplace=True)
1895-
assert(value._indexed_same(self))
1896-
np.putmask(self.values, mask, value.values)
1897-
else:
1898-
self.values[mask] = value
1899

1888

1900
def _set_item_multiple(self, keys, value):
1889
def _set_item_multiple(self, keys, value):
1901
if isinstance(value, DataFrame):
1890
if isinstance(value, DataFrame):
@@ -4878,7 +4867,7 @@ def combineMult(self, other):
4878
"""
4867
"""
4879
return self.mul(other, fill_value=1.)
4868
return self.mul(other, fill_value=1.)
4880

4869

4881-
def where(self, cond, other):
4870+
def where(self, cond, other=NA, inplace=False):
4882
"""
4871
"""
4883
Return a DataFrame with the same shape as self and whose corresponding
4872
Return a DataFrame with the same shape as self and whose corresponding
4884
entries are from self where cond is True and otherwise are from other.
4873
entries are from self where cond is True and otherwise are from other.
@@ -4893,6 +4882,9 @@ def where(self, cond, other):
4893
-------
4882
-------
4894
wh: DataFrame
4883
wh: DataFrame
4895
"""
4884
"""
4885+
if not hasattr(cond,'shape'):
4886+
raise ValueError('where requires an ndarray like object for its condition')
4887+
4896
if isinstance(cond, np.ndarray):
4888
if isinstance(cond, np.ndarray):
4897
if cond.shape != self.shape:
4889
if cond.shape != self.shape:
4898
raise ValueError('Array onditional must be same shape as self')
4890
raise ValueError('Array onditional must be same shape as self')
@@ -4905,13 +4897,17 @@ def where(self, cond, other):
4905
if isinstance(other, DataFrame):
4897
if isinstance(other, DataFrame):
4906
_, other = self.align(other, join='left', fill_value=NA)
4898
_, other = self.align(other, join='left', fill_value=NA)
4907

4899

4900+
if inplace:
4901+
np.putmask(self.values, cond, other)
4902+
return self
4903+
4908
rs = np.where(cond, self, other)
4904
rs = np.where(cond, self, other)
4909
return self._constructor(rs, self.index, self.columns)
4905
return self._constructor(rs, self.index, self.columns)
4910-
4906+
4911
def mask(self, cond):
4907
def mask(self, cond):
4912
"""
4908
"""
4913
Returns copy of self whose values are replaced with nan if the
4909
Returns copy of self whose values are replaced with nan if the
4914-
corresponding entry in cond is False
4910+
inverted condition is True
4915
4911
4916
Parameters
4912
Parameters
4917
----------
4913
----------
@@ -4921,7 +4917,7 @@ def mask(self, cond):
4921
-------
4917
-------
4922
wh: DataFrame
4918
wh: DataFrame
4923
"""
4919
"""
4924-
return self.where(cond, NA)
4920+
return self.where(~cond, NA)
4925

4921

4926
_EMPTY_SERIES = Series([])
4922
_EMPTY_SERIES = Series([])
4927

4923

pandas/tests/test_frame.py

100644100755
Lines changed: 23 additions & 3 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -141,6 +141,12 @@ def test_getitem_boolean(self):
141

141

142
self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe)
142
self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe)
143

143

144+
# test df[df >0] works
145+
bif = self.tsframe[self.tsframe > 0]
146+
bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns)
147+
self.assert_(isinstance(bif,DataFrame))
148+
self.assert_(bif.shape == self.tsframe.shape)
149+
assert_frame_equal(bif,bifw)
144

150

145
def test_getitem_boolean_list(self):
151
def test_getitem_boolean_list(self):
146
df = DataFrame(np.arange(12).reshape(3,4))
152
df = DataFrame(np.arange(12).reshape(3,4))
@@ -278,7 +284,11 @@ def test_setitem_boolean(self):
278
values[values == 5] = 0
284
values[values == 5] = 0
279
assert_almost_equal(df.values, values)
285
assert_almost_equal(df.values, values)
280

286

281-
self.assertRaises(Exception, df.__setitem__, df[:-1] > 0, 2)
287+
# a df that needs alignment first
288+
df[df[:-1]<0] = 2
289+
np.putmask(values[:-1],values[:-1]<0,2)
290+
assert_almost_equal(df.values, values)
291+
282
self.assertRaises(Exception, df.__setitem__, df * 0, 2)
292
self.assertRaises(Exception, df.__setitem__, df * 0, 2)
283

293

284
# index with DataFrame
294
# index with DataFrame
@@ -5248,14 +5258,24 @@ def test_where(self):
5248
for k, v in rs.iteritems():
5258
for k, v in rs.iteritems():
5249
assert_series_equal(v, np.where(cond[k], df[k], other5))
5259
assert_series_equal(v, np.where(cond[k], df[k], other5))
5250

5260

5251-
assert_frame_equal(rs, df.mask(cond))
5252-
5253
err1 = (df + 1).values[0:2, :]
5261
err1 = (df + 1).values[0:2, :]
5254
self.assertRaises(ValueError, df.where, cond, err1)
5262
self.assertRaises(ValueError, df.where, cond, err1)
5255

5263

5256
err2 = cond.ix[:2, :].values
5264
err2 = cond.ix[:2, :].values
5257
self.assertRaises(ValueError, df.where, err2, other1)
5265
self.assertRaises(ValueError, df.where, err2, other1)
5258

5266

5267+
# invalid conditions
5268+
self.assertRaises(ValueError, df.mask, True)
5269+
self.assertRaises(ValueError, df.mask, 0)
5270+
5271+
def test_mask(self):
5272+
df = DataFrame(np.random.randn(5, 3))
5273+
cond = df > 0
5274+
5275+
rs = df.where(cond, np.nan)
5276+
assert_frame_equal(rs, df.mask(df <= 0))
5277+
assert_frame_equal(rs, df.mask(~cond))
5278+
5259

5279

5260
#----------------------------------------------------------------------
5280
#----------------------------------------------------------------------
5261
# Transposing
5281
# Transposing

0 commit comments

Comments
 (0)