From 3d1474683d678d1fbca778e071d27b7c9f45bd09 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 20 Aug 2013 17:09:26 -0400 Subject: [PATCH 1/2] TST: GH4312, assignment with iloc/loc and an dtype change --- pandas/tests/test_indexing.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 26cef3acbfad1..12b7a239b2758 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -964,11 +964,12 @@ def test_ix_assign_column_mixed(self): # GH 3668, mixed frame with series value df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'}) expected = df.copy() - expected.ix[0, 'y'] = 1000 - expected.ix[2, 'y'] = 1200 - expected.ix[4, 'y'] = 1400 - expected.ix[6, 'y'] = 1600 - expected.ix[8, 'y'] = 1800 + + for i in range(5): + indexer = i*2 + v = 1000 + i*200 + expected.ix[indexer, 'y'] = v + self.assert_(expected.ix[indexer, 'y'] == v) df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 assert_frame_equal(df,expected) @@ -1197,6 +1198,22 @@ def gen_expected(df,mask): expected = gen_expected(df,mask) assert_frame_equal(result,expected) + def test_astype_assignment_with_iloc(self): + + # GH4312 + df_orig = DataFrame([['1','2','3','.4',5,6.,'foo']],columns=list('ABCDEFG')) + + df = df_orig.copy() + df.iloc[:,0:3] = df.iloc[:,0:3].astype(int) + result = df.get_dtype_counts().sort_index() + expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index() + assert_series_equal(result,expected) + + df = df_orig.copy() + df.iloc[:,0:3] = df.iloc[:,0:3].convert_objects(convert_numeric=True) + result = df.get_dtype_counts().sort_index() + expected = Series({ 'int64' : 4, 'float64' : 1, 'object' : 2 }).sort_index() + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], From 3b5d06eb1e28767a17cedab21c52a59ef99e2c0c Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 20 Aug 2013 18:03:10 -0400 Subject: [PATCH 2/2] BUG: Fix assignment with iloc/loc involving a dtype change in an existing column (GH4312) --- doc/source/release.rst | 2 + pandas/core/common.py | 42 +++----------------- pandas/core/groupby.py | 4 +- pandas/core/indexing.py | 12 +++--- pandas/core/internals.py | 74 +++++++++++++++++++++-------------- pandas/tests/test_common.py | 8 ++-- pandas/tests/test_frame.py | 3 +- pandas/tests/test_indexing.py | 1 + 8 files changed, 68 insertions(+), 78 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 9be9a03b0346e..b69c6aefd07d3 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -265,6 +265,8 @@ See :ref:`Internal Refactoring` - Fixed issue where ``DataFrame.apply`` was reraising exceptions incorrectly (causing the original stack trace to be truncated). - Fix selection with ``ix/loc`` and non_unique selectors (:issue:`4619`) + - Fix assignment with iloc/loc involving a dtype change in an existing column (:issue:`4312`) + have internal setitem_with_indexer in core/indexing to use Block.setitem pandas 0.12 =========== diff --git a/pandas/core/common.py b/pandas/core/common.py index e46abb4aa83a6..0d3bd1a0c6de2 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -891,42 +891,6 @@ def changeit(): return result, False - -def _maybe_upcast_indexer(result, indexer, other, dtype=None): - """ a safe version of setitem that (potentially upcasts the result - return the result and a changed flag - """ - - other = _maybe_cast_scalar(result.dtype, other) - original_dtype = result.dtype - - def changeit(): - # our type is wrong here, need to upcast - r, fill_value = _maybe_upcast( - result, fill_value=other, dtype=dtype, copy=True) - try: - r[indexer] = other - except: - - # if we hit this then we still have an incompatible type - r[indexer] = fill_value - - # if we have changed to floats, might want to cast back if we can - r = _possibly_downcast_to_dtype(r, original_dtype) - return r, True - - new_dtype, fill_value = _maybe_promote(original_dtype, other) - if new_dtype != result.dtype: - return changeit() - - try: - result[indexer] = other - except: - return changeit() - - return result, False - - def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False): """ provide explicty type promotion and coercion @@ -987,6 +951,12 @@ def _possibly_downcast_to_dtype(result, dtype): dtype = np.dtype(dtype) try: + + # don't allow upcasts here + if dtype.kind == result.dtype.kind: + if result.dtype.itemsize <= dtype.itemsize: + return result + if issubclass(dtype.type, np.floating): return result.astype(dtype) elif dtype == np.bool_ or issubclass(dtype.type, np.integer): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index d85ef1abd0fbc..1f15f1a8ae10d 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1557,7 +1557,9 @@ def transform(self, func, *args, **kwargs): # need to do a safe put here, as the dtype may be different # this needs to be an ndarray - result,_ = com._maybe_upcast_indexer(result, indexer, res) + result = Series(result) + result.loc[indexer] = res + result = result.values # downcast if we can (and need) result = _possibly_downcast_to_dtype(result, dtype) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3a0123b40aa39..64760cdba60ff 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -124,11 +124,13 @@ def _setitem_with_indexer(self, indexer, value): item_labels = self.obj._get_axis(info_axis) def setter(item, v): - data = self.obj[item] - values = data.values - if np.prod(values.shape): - result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None)) - self.obj[item] = result + s = self.obj[item] + pi = plane_indexer[0] if len(plane_indexer) == 1 else plane_indexer + + # set the item, possibly having a dtype change + s = s.copy() + s._data = s._data.setitem(pi,v) + self.obj[item] = s labels = item_labels[info_idx] diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ecce508284fc1..9ca4dcda361ba 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -39,6 +39,7 @@ class Block(PandasObject): is_float = False is_integer = False is_complex = False + is_datetime = False is_bool = False is_object = False is_sparse = False @@ -453,10 +454,19 @@ def _can_hold_element(self, value): def _try_cast(self, value): raise NotImplementedError() - def _try_cast_result(self, result): + def _try_cast_result(self, result, dtype=None): """ try to cast the result to our original type, we may have roundtripped thru object in the mean-time """ - return result + if dtype is None: + dtype = self.dtype + + if self.is_integer or self.is_bool or self.is_datetime: + pass + elif self.is_float and result.dtype == self.dtype: + return result + + # may need to change the dtype here + return _possibly_downcast_to_dtype(result, dtype) def _try_coerce_args(self, values, other): """ provide coercion to our input arguments """ @@ -513,27 +523,29 @@ def setitem(self, indexer, value): """ set the value inplace; return a new block (of a possibly different dtype) indexer is a direct slice/positional indexer; value must be a compaitable shape """ - values = self.values - if self.ndim == 2: - values = values.T + # coerce args + values, value = self._try_coerce_args(self.values, value) + arr_value = np.array(value) - # 2-d (DataFrame) are represented as a transposed array - if self._can_hold_element(value): - try: - values[indexer] = value - return [ self ] - except (IndexError): - return [ self ] - except: - pass + # cast the values to a type that can hold nan (if necessary) + if not self._can_hold_element(value): + dtype, _ = com._maybe_promote(arr_value.dtype) + values = values.astype(dtype) - # create an indexing mask, the putmask which potentially changes the dtype - indices = np.arange(np.prod(values.shape)).reshape(values.shape) - mask = indices[indexer] == indices - if self.ndim == 2: - mask = mask.T + try: + # set and return a block + transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) + values = transf(values) + values[indexer] = value + + # coerce and try to infer the dtypes of the result + values = self._try_coerce_result(values) + values = self._try_cast_result(values, 'infer') + return [make_block(transf(values), self.items, self.ref_items, ndim=self.ndim, fastpath=True)] + except: + pass - return self.putmask(mask, value, inplace=True) + return [ self ] def putmask(self, mask, new, inplace=False): """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -585,7 +597,10 @@ def create_block(v, m, n, item, reshape=True): if nv is None: dtype, _ = com._maybe_promote(n.dtype) nv = v.astype(dtype) - np.putmask(nv, m, n) + try: + nv[m] = n + except: + np.putmask(nv, m, n) if reshape: nv = _block_shape(nv) @@ -842,10 +857,6 @@ class NumericBlock(Block): is_numeric = True _can_hold_na = True - def _try_cast_result(self, result): - return _possibly_downcast_to_dtype(result, self.dtype) - - class FloatBlock(NumericBlock): is_float = True _downcast_dtype = 'int64' @@ -1104,6 +1115,7 @@ def re_replacer(s): class DatetimeBlock(Block): + is_datetime = True _can_hold_na = True def __init__(self, values, items, ref_items, fastpath=False, placement=None, **kwargs): @@ -1119,8 +1131,8 @@ def _gi(self, arg): def _can_hold_element(self, element): if is_list_like(element): element = np.array(element) - return element.dtype == _NS_DTYPE - return com.is_integer(element) or isinstance(element, datetime) + return element.dtype == _NS_DTYPE or element.dtype == np.int64 + return com.is_integer(element) or isinstance(element, datetime) or isnull(element) def _try_cast(self, element): try: @@ -1133,10 +1145,10 @@ def _try_coerce_args(self, values, other): we are going to compare vs i8, so coerce to integer values is always ndarra like, other may not be """ values = values.view('i8') - if isinstance(other, datetime): - other = lib.Timestamp(other).asm8.view('i8') - elif isnull(other): + if isnull(other) or (np.isscalar(other) and other == tslib.iNaT): other = tslib.iNaT + elif isinstance(other, datetime): + other = lib.Timestamp(other).asm8.view('i8') else: other = other.view('i8') @@ -1438,6 +1450,8 @@ def split_block_at(self, item): return [] return super(SparseBlock, self).split_block_at(self, item) + def _try_cast_result(self, result, dtype=None): + return result def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fastpath=False, placement=None): diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 946e640d331cc..799cdd31c0a65 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -127,14 +127,14 @@ def test_nan_to_nat_conversions(): result = df.loc[4,'B'].value assert(result == iNaT) - values = df['B'].values - result, changed = com._maybe_upcast_indexer(values,tuple([slice(8,9)]),np.nan) - assert(isnull(result[8])) + s = df['B'].copy() + s._data = s._data.setitem(tuple([slice(8,9)]),np.nan) + assert(isnull(s[8])) # numpy < 1.7.0 is wrong from distutils.version import LooseVersion if LooseVersion(np.__version__) >= '1.7.0': - assert(result[8] == np.datetime64('NaT')) + assert(s[8].value == np.datetime64('NaT').astype(np.int64)) def test_any_none(): assert(com._any_none(1, 2, 3, None)) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1a65eec8557c0..c8f87a19a5f34 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7669,10 +7669,9 @@ def _check_get(df, cond, check_dtypes = True): # upcasting case (GH # 2794) df = DataFrame(dict([ (c,Series([1]*3,dtype=c)) for c in ['int64','int32','float32','float64'] ])) df.ix[1,:] = 0 - result = df.where(df>=0).get_dtype_counts() - #### when we don't preserver boolean casts #### + #### when we don't preserve boolean casts #### #expected = Series({ 'float32' : 1, 'float64' : 3 }) expected = Series({ 'float32' : 1, 'float64' : 1, 'int32' : 1, 'int64' : 1 }) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 12b7a239b2758..76003de65180f 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -942,6 +942,7 @@ def test_multi_assign(self): # frame on rhs df2.ix[mask, cols]= dft.ix[mask, cols] assert_frame_equal(df2,expected) + df2.ix[mask, cols]= dft.ix[mask, cols] assert_frame_equal(df2,expected)