Skip to content

Commit ca8a6ba

Browse files
committed
BUG: make inplace semantics of DataFrame.where consistent. #2230
1 parent 564175e commit ca8a6ba

File tree

3 files changed

+35
-16
lines changed

3 files changed

+35
-16
lines changed

RELEASE.rst

Lines changed: 2 additions & 0 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -57,6 +57,7 @@ pandas 0.9.1
57
- Improve performance of Series/DataFrame.diff (re: #2087)
57
- Improve performance of Series/DataFrame.diff (re: #2087)
58
- Support unary ~ (__invert__) in DataFrame (#2110)
58
- Support unary ~ (__invert__) in DataFrame (#2110)
59
- Turn off pandas-style tick locators and formatters (#2205)
59
- Turn off pandas-style tick locators and formatters (#2205)
60+
- DataFrame[DataFrame] uses DataFrame.where to compute masked frame (#2230)
60

61

61
**Bug fixes**
62
**Bug fixes**
62

63

@@ -100,6 +101,7 @@ pandas 0.9.1
100
- Fix improper MultiIndex conversion issue when assigning
101
- Fix improper MultiIndex conversion issue when assigning
101
e.g. DataFrame.index (#2200)
102
e.g. DataFrame.index (#2200)
102
- Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236)
103
- Fix conversion of mixed-type DataFrame to ndarray with dup columns (#2236)
104+
- Fix duplicate columns issue (#2218, #2219)
103
105
104
pandas 0.9.0
106
pandas 0.9.0
105
============
107
============

pandas/core/frame.py

100755100644
Lines changed: 13 additions & 6 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -1884,7 +1884,7 @@ def _boolean_set(self, key, value):
1884
if self._is_mixed_type:
1884
if self._is_mixed_type:
1885
raise ValueError('Cannot do boolean setting on mixed-type frame')
1885
raise ValueError('Cannot do boolean setting on mixed-type frame')
1886

1886

1887-
self.where(key, value, inplace=True)
1887+
self.where(-key, value, inplace=True)
1888

1888

1889
def _set_item_multiple(self, keys, value):
1889
def _set_item_multiple(self, keys, value):
1890
if isinstance(value, DataFrame):
1890
if isinstance(value, DataFrame):
@@ -4872,7 +4872,6 @@ def where(self, cond, other=NA, inplace=False):
4872
Return a DataFrame with the same shape as self and whose corresponding
4872
Return a DataFrame with the same shape as self and whose corresponding
4873
entries are from self where cond is True and otherwise are from other.
4873
entries are from self where cond is True and otherwise are from other.
4874
4874
4875-
4876
Parameters
4875
Parameters
4877
----------
4876
----------
4878
cond: boolean DataFrame or array
4877
cond: boolean DataFrame or array
@@ -4882,17 +4881,25 @@ def where(self, cond, other=NA, inplace=False):
4882
-------
4881
-------
4883
wh: DataFrame
4882
wh: DataFrame
4884
"""
4883
"""
4885-
if not hasattr(cond,'shape'):
4884+
if not hasattr(cond, 'shape'):
4886-
raise ValueError('where requires an ndarray like object for its condition')
4885+
raise ValueError('where requires an ndarray like object for its '
4886+
'condition')
4887

4887

4888
if isinstance(cond, np.ndarray):
4888
if isinstance(cond, np.ndarray):
4889
if cond.shape != self.shape:
4889
if cond.shape != self.shape:
4890
raise ValueError('Array onditional must be same shape as self')
4890
raise ValueError('Array onditional must be same shape as self')
4891
cond = self._constructor(cond, index=self.index,
4891
cond = self._constructor(cond, index=self.index,
4892
columns=self.columns)
4892
columns=self.columns)
4893+
4893
if cond.shape != self.shape:
4894
if cond.shape != self.shape:
4894
cond = cond.reindex(self.index, columns=self.columns)
4895
cond = cond.reindex(self.index, columns=self.columns)
4895-
cond = cond.fillna(False)
4896+
4897+
if inplace:
4898+
cond = -(cond.fillna(True).astype(bool))
4899+
else:
4900+
cond = cond.fillna(False).astype(bool)
4901+
elif inplace:
4902+
cond = -cond
4896

4903

4897
if isinstance(other, DataFrame):
4904
if isinstance(other, DataFrame):
4898
_, other = self.align(other, join='left', fill_value=NA)
4905
_, other = self.align(other, join='left', fill_value=NA)
@@ -4903,7 +4910,7 @@ def where(self, cond, other=NA, inplace=False):
4903

4910

4904
rs = np.where(cond, self, other)
4911
rs = np.where(cond, self, other)
4905
return self._constructor(rs, self.index, self.columns)
4912
return self._constructor(rs, self.index, self.columns)
4906-
4913+
4907
def mask(self, cond):
4914
def mask(self, cond):
4908
"""
4915
"""
4909
Returns copy of self whose values are replaced with nan if the
4916
Returns copy of self whose values are replaced with nan if the

pandas/tests/test_frame.py

100755100644
Lines changed: 20 additions & 10 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -143,7 +143,8 @@ def test_getitem_boolean(self):
143

143

144
# test df[df >0] works
144
# test df[df >0] works
145
bif = self.tsframe[self.tsframe > 0]
145
bif = self.tsframe[self.tsframe > 0]
146-
bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns)
146+
bifw = DataFrame(np.where(self.tsframe > 0, self.tsframe, np.nan),
147+
index=self.tsframe.index,columns=self.tsframe.columns)
147
self.assert_(isinstance(bif,DataFrame))
148
self.assert_(isinstance(bif,DataFrame))
148
self.assert_(bif.shape == self.tsframe.shape)
149
self.assert_(bif.shape == self.tsframe.shape)
149
assert_frame_equal(bif,bifw)
150
assert_frame_equal(bif,bifw)
@@ -285,8 +286,8 @@ def test_setitem_boolean(self):
285
assert_almost_equal(df.values, values)
286
assert_almost_equal(df.values, values)
286

287

287
# a df that needs alignment first
288
# a df that needs alignment first
288-
df[df[:-1]<0] = 2
289+
df[df[:-1] < 0] = 2
289-
np.putmask(values[:-1],values[:-1]<0,2)
290+
np.putmask(values[:-1], values[:-1] < 0, 2)
290
assert_almost_equal(df.values, values)
291
assert_almost_equal(df.values, values)
291

292

292
self.assertRaises(Exception, df.__setitem__, df * 0, 2)
293
self.assertRaises(Exception, df.__setitem__, df * 0, 2)
@@ -5268,6 +5269,13 @@ def test_where(self):
5268
self.assertRaises(ValueError, df.mask, True)
5269
self.assertRaises(ValueError, df.mask, True)
5269
self.assertRaises(ValueError, df.mask, 0)
5270
self.assertRaises(ValueError, df.mask, 0)
5270

5271

5272+
# where inplace
5273+
df = DataFrame(np.random.randn(5, 3))
5274+
5275+
expected = df.mask(df < 0)
5276+
df.where(df >= 0, np.nan, inplace=True)
5277+
assert_frame_equal(df, expected)
5278+
5271
def test_mask(self):
5279
def test_mask(self):
5272
df = DataFrame(np.random.randn(5, 3))
5280
df = DataFrame(np.random.randn(5, 3))
5273
cond = df > 0
5281
cond = df > 0
@@ -7232,13 +7240,15 @@ def test_xs_view(self):
7232
def test_boolean_indexing(self):
7240
def test_boolean_indexing(self):
7233
idx = range(3)
7241
idx = range(3)
7234
cols = range(3)
7242
cols = range(3)
7235-
df1 = DataFrame(index=idx, columns=cols, \
7243+
df1 = DataFrame(index=idx, columns=cols,
7236-
data=np.array([[0.0, 0.5, 1.0],
7244+
data=np.array([[0.0, 0.5, 1.0],
7237-
[1.5, 2.0, 2.5],
7245+
[1.5, 2.0, 2.5],
7238-
[3.0, 3.5, 4.0]], dtype=float))
7246+
[3.0, 3.5, 4.0]],
7239-
df2 = DataFrame(index=idx, columns=cols, data=np.ones((len(idx), len(cols))))
7247+
dtype=float))
7240-
7248+
df2 = DataFrame(index=idx, columns=cols,
7241-
expected = DataFrame(index=idx, columns=cols, \
7249+
data=np.ones((len(idx), len(cols))))
7250+
7251+
expected = DataFrame(index=idx, columns=cols,
7242
data=np.array([[0.0, 0.5, 1.0],
7252
data=np.array([[0.0, 0.5, 1.0],
7243
[1.5, 2.0, -1],
7253
[1.5, 2.0, -1],
7244
[-1, -1, -1]], dtype=float))
7254
[-1, -1, -1]], dtype=float))

0 commit comments

Comments
 (0)