Skip to content

API/CLN: setitem in Series now consistent with DataFrame #4624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
- Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly
(causing the original stack trace to be truncated).
- Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`)
- Fix assignment with iloc/loc involving a dtype change in an existing column (:issue:`4312`)
have internal setitem_with_indexer in core/indexing to use Block.setitem

pandas 0.12
===========
Expand Down
42 changes: 6 additions & 36 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,42 +891,6 @@ def changeit():

return result, False


def _maybe_upcast_indexer(result, indexer, other, dtype=None):
""" a safe version of setitem that (potentially upcasts the result
return the result and a changed flag
"""

other = _maybe_cast_scalar(result.dtype, other)
original_dtype = result.dtype

def changeit():
# our type is wrong here, need to upcast
r, fill_value = _maybe_upcast(
result, fill_value=other, dtype=dtype, copy=True)
try:
r[indexer] = other
except:

# if we hit this then we still have an incompatible type
r[indexer] = fill_value

# if we have changed to floats, might want to cast back if we can
r = _possibly_downcast_to_dtype(r, original_dtype)
return r, True

new_dtype, fill_value = _maybe_promote(original_dtype, other)
if new_dtype != result.dtype:
return changeit()

try:
result[indexer] = other
except:
return changeit()

return result, False


def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
""" provide explicty type promotion and coercion

Expand Down Expand Up @@ -987,6 +951,12 @@ def _possibly_downcast_to_dtype(result, dtype):
dtype = np.dtype(dtype)

try:

# don't allow upcasts here
if dtype.kind == result.dtype.kind:
if result.dtype.itemsize <= dtype.itemsize:
return result

if issubclass(dtype.type, np.floating):
return result.astype(dtype)
elif dtype == np.bool_ or issubclass(dtype.type, np.integer):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1557,7 +1557,9 @@ def transform(self, func, *args, **kwargs):

# need to do a safe put here, as the dtype may be different
# this needs to be an ndarray
result,_ = com._maybe_upcast_indexer(result, indexer, res)
result = Series(result)
result.loc[indexer] = res
result = result.values

# downcast if we can (and need)
result = _possibly_downcast_to_dtype(result, dtype)
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,13 @@ def _setitem_with_indexer(self, indexer, value):
item_labels = self.obj._get_axis(info_axis)

def setter(item, v):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None))
self.obj[item] = result
s = self.obj[item]
pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer

# set the item, possibly having a dtype change
s = s.copy()
s._data = s._data.setitem(pi,v)
self.obj[item] = s

labels = item_labels[info_idx]

Expand Down
74 changes: 44 additions & 30 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class Block(PandasObject):
is_float = False
is_integer = False
is_complex = False
is_datetime = False
is_bool = False
is_object = False
is_sparse = False
Expand Down Expand Up @@ -453,10 +454,19 @@ def _can_hold_element(self, value):
def _try_cast(self, value):
raise NotImplementedError()

def _try_cast_result(self, result):
def _try_cast_result(self, result, dtype=None):
""" try to cast the result to our original type,
we may have roundtripped thru object in the mean-time """
return result
if dtype is None:
dtype = self.dtype

if self.is_integer or self.is_bool or self.is_datetime:
pass
elif self.is_float and result.dtype == self.dtype:
return result

# may need to change the dtype here
return _possibly_downcast_to_dtype(result, dtype)

def _try_coerce_args(self, values, other):
""" provide coercion to our input arguments """
Expand Down Expand Up @@ -513,27 +523,29 @@ def setitem(self, indexer, value):
""" set the value inplace; return a new block (of a possibly different dtype)
indexer is a direct slice/positional indexer; value must be a compaitable shape """

values = self.values
if self.ndim == 2:
values = values.T
# coerce args
values, value = self._try_coerce_args(self.values, value)
arr_value = np.array(value)

# 2-d (DataFrame) are represented as a transposed array
if self._can_hold_element(value):
try:
values[indexer] = value
return [ self ]
except (IndexError):
return [ self ]
except:
pass
# cast the values to a type that can hold nan (if necessary)
if not self._can_hold_element(value):
dtype, _ = com._maybe_promote(arr_value.dtype)
values = values.astype(dtype)

# create an indexing mask, the putmask which potentially changes the dtype
indices = np.arange(np.prod(values.shape)).reshape(values.shape)
mask = indices[indexer] == indices
if self.ndim == 2:
mask = mask.T
try:
# set and return a block
transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x)
values = transf(values)
values[indexer] = value

# coerce and try to infer the dtypes of the result
values = self._try_coerce_result(values)
values = self._try_cast_result(values, 'infer')
return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)]
except:
pass

return self.putmask(mask, value, inplace=True)
return [ self ]

def putmask(self, mask, new, inplace=False):
""" putmask the data to the block; it is possible that we may create a new dtype of block
Expand Down Expand Up @@ -585,7 +597,10 @@ def create_block(v, m, n, item, reshape=True):
if nv is None:
dtype, _ = com._maybe_promote(n.dtype)
nv = v.astype(dtype)
np.putmask(nv, m, n)
try:
nv[m] = n
except:
np.putmask(nv, m, n)

if reshape:
nv = _block_shape(nv)
Expand Down Expand Up @@ -842,10 +857,6 @@ class NumericBlock(Block):
is_numeric = True
_can_hold_na = True

def _try_cast_result(self, result):
return _possibly_downcast_to_dtype(result, self.dtype)


class FloatBlock(NumericBlock):
is_float = True
_downcast_dtype = 'int64'
Expand Down Expand Up @@ -1104,6 +1115,7 @@ def re_replacer(s):


class DatetimeBlock(Block):
is_datetime = True
_can_hold_na = True

def __init__(self, values, items, ref_items, fastpath=False, placement=None, **kwargs):
Expand All @@ -1119,8 +1131,8 @@ def _gi(self, arg):
def _can_hold_element(self, element):
if is_list_like(element):
element = np.array(element)
return element.dtype == _NS_DTYPE
return com.is_integer(element) or isinstance(element, datetime)
return element.dtype == _NS_DTYPE or element.dtype == np.int64
return com.is_integer(element) or isinstance(element, datetime) or isnull(element)

def _try_cast(self, element):
try:
Expand All @@ -1133,10 +1145,10 @@ def _try_coerce_args(self, values, other):
we are going to compare vs i8, so coerce to integer
values is always ndarra like, other may not be """
values = values.view('i8')
if isinstance(other, datetime):
other = lib.Timestamp(other).asm8.view('i8')
elif isnull(other):
if isnull(other) or (np.isscalar(other) and other == tslib.iNaT):
other = tslib.iNaT
elif isinstance(other, datetime):
other = lib.Timestamp(other).asm8.view('i8')
else:
other = other.view('i8')

Expand Down Expand Up @@ -1438,6 +1450,8 @@ def split_block_at(self, item):
return []
return super(SparseBlock, self).split_block_at(self, item)

def _try_cast_result(self, result, dtype=None):
return result

def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fastpath=False, placement=None):

Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,14 @@ def test_nan_to_nat_conversions():
result = df.loc[4,'B'].value
assert(result == iNaT)

values = df['B'].values
result, changed = com._maybe_upcast_indexer(values,tuple([slice(8,9)]),np.nan)
assert(isnull(result[8]))
s = df['B'].copy()
s._data = s._data.setitem(tuple([slice(8,9)]),np.nan)
assert(isnull(s[8]))

# numpy < 1.7.0 is wrong
from distutils.version import LooseVersion
if LooseVersion(np.__version__) >= '1.7.0':
assert(result[8] == np.datetime64('NaT'))
assert(s[8].value == np.datetime64('NaT').astype(np.int64))

def test_any_none():
assert(com._any_none(1, 2, 3, None))
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7669,10 +7669,9 @@ def _check_get(df, cond, check_dtypes = True):
# upcasting case (GH # 2794)
df = DataFrame(dict([ (c,Series([1]*3,dtype=c)) for c in ['int64','int32','float32','float64'] ]))
df.ix[1,:] = 0

result = df.where(df>=0).get_dtype_counts()

#### when we don't preserver boolean casts ####
#### when we don't preserve boolean casts ####
#expected = Series({ 'float32' : 1, 'float64' : 3 })

expected = Series({ 'float32' : 1, 'float64' : 1, 'int32' : 1, 'int64' : 1 })
Expand Down
28 changes: 23 additions & 5 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,6 +942,7 @@ def test_multi_assign(self):
# frame on rhs
df2.ix[mask, cols]= dft.ix[mask, cols]
assert_frame_equal(df2,expected)

df2.ix[mask, cols]= dft.ix[mask, cols]
assert_frame_equal(df2,expected)

Expand All @@ -964,11 +965,12 @@ def test_ix_assign_column_mixed(self):
# GH 3668, mixed frame with series value
df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'})
expected = df.copy()
expected.ix[0, 'y'] = 1000
expected.ix[2, 'y'] = 1200
expected.ix[4, 'y'] = 1400
expected.ix[6, 'y'] = 1600
expected.ix[8, 'y'] = 1800

for i in range(5):
indexer = i*2
v = 1000 + i*200
expected.ix[indexer, 'y'] = v
self.assert_(expected.ix[indexer, 'y'] == v)

df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100
assert_frame_equal(df,expected)
Expand Down Expand Up @@ -1197,6 +1199,22 @@ def gen_expected(df,mask):
expected = gen_expected(df,mask)
assert_frame_equal(result,expected)

def test_astype_assignment_with_iloc(self):

# GH4312
df_orig = DataFrame([['1','2','3','.4',5,6.,'foo']],columns=list('ABCDEFG'))

df = df_orig.copy()
df.iloc[:,0:3] = df.iloc[:,0:3].astype(int)
result = df.get_dtype_counts().sort_index()
expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()
assert_series_equal(result,expected)

df = df_orig.copy()
df.iloc[:,0:3] = df.iloc[:,0:3].convert_objects(convert_numeric=True)
result = df.get_dtype_counts().sort_index()
expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index()

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down