Skip to content

BUG: bug in deep copy of datetime tz-aware objects, #11794 #11796

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 8, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ Bug Fixes
- Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
- Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
- Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)

- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)



Expand Down
23 changes: 18 additions & 5 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def _isnull_ndarraylike(obj):
values = getattr(obj, 'values', obj)
dtype = values.dtype

if dtype.kind in ('O', 'S', 'U'):
if is_string_dtype(dtype):
if is_categorical_dtype(values):
from pandas import Categorical
if not isinstance(values, Categorical):
Expand All @@ -243,7 +243,7 @@ def _isnull_ndarraylike(obj):
# Working around NumPy ticket 1542
shape = values.shape

if dtype.kind in ('S', 'U'):
if is_string_like_dtype(dtype):
result = np.zeros(values.shape, dtype=bool)
else:
result = np.empty(shape, dtype=bool)
Expand All @@ -267,11 +267,11 @@ def _isnull_ndarraylike_old(obj):
values = getattr(obj, 'values', obj)
dtype = values.dtype

if dtype.kind in ('O', 'S', 'U'):
if is_string_dtype(dtype):
# Working around NumPy ticket 1542
shape = values.shape

if values.dtype.kind in ('S', 'U'):
if is_string_like_dtype(dtype):
result = np.zeros(values.shape, dtype=bool)
else:
result = np.empty(shape, dtype=bool)
Expand Down Expand Up @@ -2208,13 +2208,17 @@ def is_numeric_v_string_like(a, b):

is_a_numeric_array = is_a_array and is_numeric_dtype(a)
is_b_numeric_array = is_b_array and is_numeric_dtype(b)
is_a_string_array = is_a_array and is_string_like_dtype(a)
is_b_string_array = is_b_array and is_string_like_dtype(b)

is_a_scalar_string_like = not is_a_array and is_string_like(a)
is_b_scalar_string_like = not is_b_array and is_string_like(b)

return (
is_a_numeric_array and is_b_scalar_string_like) or (
is_b_numeric_array and is_a_scalar_string_like
is_b_numeric_array and is_a_scalar_string_like) or (
is_a_numeric_array and is_b_string_array) or (
is_b_numeric_array and is_a_string_array
)

def is_datetimelike_v_numeric(a, b):
Expand Down Expand Up @@ -2257,6 +2261,15 @@ def is_numeric_dtype(arr_or_dtype):
and not issubclass(tipo, (np.datetime64, np.timedelta64)))


def is_string_dtype(arr_or_dtype):
dtype = _get_dtype(arr_or_dtype)
return dtype.kind in ('O', 'S', 'U')

def is_string_like_dtype(arr_or_dtype):
# exclude object as its a mixed dtype
dtype = _get_dtype(arr_or_dtype)
return dtype.kind in ('S', 'U')

def is_float_dtype(arr_or_dtype):
tipo = _get_dtype_type(arr_or_dtype)
return issubclass(tipo, np.floating)
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ def __hash__(self):
def __eq__(self, other):
raise NotImplementedError("sub-classes should implement an __eq__ method")

def __ne__(self, other):
return not self.__eq__(other)

@classmethod
def is_dtype(cls, dtype):
""" Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """
Expand Down
49 changes: 28 additions & 21 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,11 @@ def make_block(self, values, placement=None, ndim=None, **kwargs):

return make_block(values, placement=placement, ndim=ndim, **kwargs)

def make_block_same_class(self, values, placement, copy=False, fastpath=True,
**kwargs):
"""
Wrap given values in a block of same type as self.

`kwargs` are used in SparseBlock override.

"""
if copy:
values = values.copy()
return make_block(values, placement, klass=self.__class__,
def make_block_same_class(self, values, placement=None, fastpath=True, **kwargs):
""" Wrap given values in a block of same type as self. """
if placement is None:
placement = self.mgr_locs
return make_block(values, placement=placement, klass=self.__class__,
fastpath=fastpath, **kwargs)

@mgr_locs.setter
Expand Down Expand Up @@ -573,12 +567,11 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs):

# block actions ####
def copy(self, deep=True, mgr=None):
""" copy constructor """
values = self.values
if deep:
values = values.copy()
return self.make_block(values,
klass=self.__class__,
fastpath=True)
return self.make_block_same_class(values)

def replace(self, to_replace, value, inplace=False, filter=None,
regex=False, convert=True, mgr=None):
Expand Down Expand Up @@ -2140,6 +2133,13 @@ def __init__(self, values, placement, ndim=2,
placement=placement,
ndim=ndim,
**kwargs)
def copy(self, deep=True, mgr=None):
""" copy constructor """
values = self.values
if deep:
values = values.copy(deep=True)
return self.make_block_same_class(values)

def external_values(self):
""" we internally represent the data as a DatetimeIndex, but for external
compat with ndarray, export as a ndarray of Timestamps """
Expand Down Expand Up @@ -3257,10 +3257,14 @@ def get_scalar(self, tup):
full_loc = list(ax.get_loc(x)
for ax, x in zip(self.axes, tup))
blk = self.blocks[self._blknos[full_loc[0]]]
full_loc[0] = self._blklocs[full_loc[0]]
values = blk.values

# FIXME: this may return non-upcasted types?
return blk.values[tuple(full_loc)]
if values.ndim == 1:
return values[full_loc[1]]

full_loc[0] = self._blklocs[full_loc[0]]
return values[tuple(full_loc)]

def delete(self, item):
"""
Expand Down Expand Up @@ -4415,11 +4419,14 @@ def _putmask_smart(v, m, n):
try:
nn = n[m]
nn_at = nn.astype(v.dtype)
comp = (nn == nn_at)
if is_list_like(comp) and comp.all():
nv = v.copy()
nv[m] = nn_at
return nv

# avoid invalid dtype comparisons
if not is_numeric_v_string_like(nn, nn_at):
comp = (nn == nn_at)
if is_list_like(comp) and comp.all():
nv = v.copy()
nv[m] = nn_at
return nv
except (ValueError, IndexError, TypeError):
pass

Expand Down
36 changes: 25 additions & 11 deletions pandas/tests/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def create_mgr(descr, item_shape=None):
block_placements = OrderedDict()
for d in descr.split(';'):
d = d.strip()
if not len(d):
continue
names, blockstr = d.partition(':')[::2]
blockstr = blockstr.strip()
names = names.strip().split(',')
Expand Down Expand Up @@ -324,7 +326,8 @@ class TestBlockManager(tm.TestCase):

def setUp(self):
self.mgr = create_mgr('a: f8; b: object; c: f8; d: object; e: f8;'
'f: bool; g: i8; h: complex')
'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
'k: M8[ns, US/Eastern]; l: M8[ns, CET];')

def test_constructor_corner(self):
pass
Expand Down Expand Up @@ -476,16 +479,24 @@ def test_set_change_dtype_slice(self): # GH8850
DataFrame([[3], [6]], columns=cols[2:]))

def test_copy(self):
shallow = self.mgr.copy(deep=False)

# we don't guaranteee block ordering
for blk in self.mgr.blocks:
found = False
for cp_blk in shallow.blocks:
if cp_blk.values is blk.values:
found = True
break
self.assertTrue(found)
cp = self.mgr.copy(deep=False)
for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):

# view assertion
self.assertTrue(cp_blk.equals(blk))
self.assertTrue(cp_blk.values.base is blk.values.base)

cp = self.mgr.copy(deep=True)
for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):

# copy assertion
# we either have a None for a base or in case of some blocks it is an array (e.g. datetimetz),
# but was copied
self.assertTrue(cp_blk.equals(blk))
if cp_blk.values.base is not None and blk.values.base is not None:
self.assertFalse(cp_blk.values.base is blk.values.base)
else:
self.assertTrue(cp_blk.values.base is None and blk.values.base is None)

def test_sparse(self):
mgr = create_mgr('a: sparse-1; b: sparse-2')
Expand Down Expand Up @@ -688,7 +699,10 @@ def test_consolidate_ordering_issues(self):
self.mgr.set('g', randn(N))
self.mgr.set('h', randn(N))

# we have datetime/tz blocks in self.mgr
cons = self.mgr.consolidate()
self.assertEqual(cons.nblocks, 4)
cons = self.mgr.consolidate().get_numeric_data()
self.assertEqual(cons.nblocks, 1)
assert_almost_equal(cons.blocks[0].mgr_locs,
np.arange(len(cons.items)))
Expand Down
46 changes: 42 additions & 4 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5111,12 +5111,50 @@ def test_cov(self):
self.assertTrue(isnull(ts1.cov(ts2, min_periods=12)))

def test_copy(self):
ts = self.ts.copy()

ts[::2] = np.NaN
for deep in [None, False, True]:
s = Series(np.arange(10),dtype='float64')

# default deep is True
if deep is None:
s2 = s.copy()
else:
s2 = s.copy(deep=deep)

s2[::2] = np.NaN

if deep is None or deep is True:
# Did not modify original Series
self.assertTrue(np.isnan(s2[0]))
self.assertFalse(np.isnan(s[0]))
else:

# Did not modify original Series
self.assertFalse(np.isnan(self.ts[0]))
# we DID modify the original Series
self.assertTrue(np.isnan(s2[0]))
self.assertTrue(np.isnan(s[0]))

# GH 11794
# copy of tz-aware
expected = Series([Timestamp('2012/01/01', tz='UTC')])
expected2 = Series([Timestamp('1999/01/01', tz='UTC')])

for deep in [None, False, True]:
s = Series([Timestamp('2012/01/01', tz='UTC')])

if deep is None:
s2 = s.copy()
else:
s2 = s.copy(deep=deep)

s2[0] = pd.Timestamp('1999/01/01', tz='UTC')

# default deep is True
if deep is None or deep is True:
assert_series_equal(s, expected)
assert_series_equal(s2, expected2)
else:
assert_series_equal(s, expected2)
assert_series_equal(s2, expected2)

def test_count(self):
self.assertEqual(self.ts.count(), len(self.ts))
Expand Down