pandas-dev · jreback · Dec 8, 2015 · Dec 8, 2015 · Dec 8, 2015
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -170,7 +170,7 @@ Bug Fixes
 - Bug in ``Timedelta.round`` with negative values (:issue:`11690`)
 - Bug in ``.loc`` against ``CategoricalIndex`` may result in normal ``Index`` (:issue:`11586`)
 - Bug in ``DataFrame.info`` when duplicated column names exist (:issue:`11761`)
-
+- Bug in ``.copy`` of datetime tz-aware objects (:issue:`11794`)
 
 
 

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -232,7 +232,7 @@ def _isnull_ndarraylike(obj):
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if dtype.kind in ('O', 'S', 'U'):
+    if is_string_dtype(dtype):
         if is_categorical_dtype(values):
             from pandas import Categorical
             if not isinstance(values, Categorical):
@@ -243,7 +243,7 @@ def _isnull_ndarraylike(obj):
             # Working around NumPy ticket 1542
             shape = values.shape
 
-            if dtype.kind in ('S', 'U'):
+            if is_string_like_dtype(dtype):
                 result = np.zeros(values.shape, dtype=bool)
             else:
                 result = np.empty(shape, dtype=bool)
@@ -267,11 +267,11 @@ def _isnull_ndarraylike_old(obj):
     values = getattr(obj, 'values', obj)
     dtype = values.dtype
 
-    if dtype.kind in ('O', 'S', 'U'):
+    if is_string_dtype(dtype):
         # Working around NumPy ticket 1542
         shape = values.shape
 
-        if values.dtype.kind in ('S', 'U'):
+        if is_string_like_dtype(dtype):
             result = np.zeros(values.shape, dtype=bool)
         else:
             result = np.empty(shape, dtype=bool)
@@ -2208,13 +2208,17 @@ def is_numeric_v_string_like(a, b):
 
     is_a_numeric_array = is_a_array and is_numeric_dtype(a)
     is_b_numeric_array = is_b_array and is_numeric_dtype(b)
+    is_a_string_array = is_a_array and is_string_like_dtype(a)
+    is_b_string_array = is_b_array and is_string_like_dtype(b)
 
     is_a_scalar_string_like = not is_a_array and is_string_like(a)
     is_b_scalar_string_like = not is_b_array and is_string_like(b)
 
     return (
         is_a_numeric_array and is_b_scalar_string_like) or (
-        is_b_numeric_array and is_a_scalar_string_like
+        is_b_numeric_array and is_a_scalar_string_like) or (
+        is_a_numeric_array and is_b_string_array) or (
+        is_b_numeric_array and is_a_string_array
         )
 
 def is_datetimelike_v_numeric(a, b):
@@ -2257,6 +2261,15 @@ def is_numeric_dtype(arr_or_dtype):
             and not issubclass(tipo, (np.datetime64, np.timedelta64)))
 
 
+def is_string_dtype(arr_or_dtype):
+    dtype = _get_dtype(arr_or_dtype)
+    return dtype.kind in ('O', 'S', 'U')
+
+def is_string_like_dtype(arr_or_dtype):
+    # exclude object as its a mixed dtype
+    dtype = _get_dtype(arr_or_dtype)
+    return dtype.kind in ('S', 'U')
+
 def is_float_dtype(arr_or_dtype):
     tipo = _get_dtype_type(arr_or_dtype)
     return issubclass(tipo, np.floating)

diff --git a/pandas/core/dtypes.py b/pandas/core/dtypes.py
@@ -65,6 +65,9 @@ def __hash__(self):
     def __eq__(self, other):
         raise NotImplementedError("sub-classes should implement an __eq__ method")
 
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
     @classmethod
     def is_dtype(cls, dtype):
         """ Return a boolean if we if the passed type is an actual dtype that we can match (via string or type) """

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -168,17 +168,11 @@ def make_block(self, values, placement=None, ndim=None, **kwargs):
 
         return make_block(values, placement=placement, ndim=ndim, **kwargs)
 
-    def make_block_same_class(self, values, placement, copy=False, fastpath=True,
-                              **kwargs):
-        """
-        Wrap given values in a block of same type as self.
-
-        `kwargs` are used in SparseBlock override.
-
-        """
-        if copy:
-            values = values.copy()
-        return make_block(values, placement, klass=self.__class__,
+    def make_block_same_class(self, values, placement=None, fastpath=True, **kwargs):
+        """ Wrap given values in a block of same type as self. """
+        if placement is None:
+            placement = self.mgr_locs
+        return make_block(values, placement=placement, klass=self.__class__,
                           fastpath=fastpath, **kwargs)
 
     @mgr_locs.setter
@@ -573,12 +567,11 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, **kwargs):
 
     # block actions ####
     def copy(self, deep=True, mgr=None):
+        """ copy constructor """
         values = self.values
         if deep:
             values = values.copy()
-        return self.make_block(values,
-                               klass=self.__class__,
-                               fastpath=True)
+        return self.make_block_same_class(values)
 
     def replace(self, to_replace, value, inplace=False, filter=None,
                 regex=False, convert=True, mgr=None):
@@ -2140,6 +2133,13 @@ def __init__(self, values, placement, ndim=2,
                                               placement=placement,
                                               ndim=ndim,
                                               **kwargs)
+    def copy(self, deep=True, mgr=None):
+        """ copy constructor """
+        values = self.values
+        if deep:
+            values = values.copy(deep=True)
+        return self.make_block_same_class(values)
+
     def external_values(self):
         """ we internally represent the data as a DatetimeIndex, but for external
         compat with ndarray, export as a ndarray of Timestamps """
@@ -3257,10 +3257,14 @@ def get_scalar(self, tup):
         full_loc = list(ax.get_loc(x)
                         for ax, x in zip(self.axes, tup))
         blk = self.blocks[self._blknos[full_loc[0]]]
-        full_loc[0] = self._blklocs[full_loc[0]]
+        values = blk.values
 
         # FIXME: this may return non-upcasted types?
-        return blk.values[tuple(full_loc)]
+        if values.ndim == 1:
+            return values[full_loc[1]]
+
+        full_loc[0] = self._blklocs[full_loc[0]]
+        return values[tuple(full_loc)]
 
     def delete(self, item):
         """
@@ -4415,11 +4419,14 @@ def _putmask_smart(v, m, n):
     try:
         nn = n[m]
         nn_at = nn.astype(v.dtype)
-        comp = (nn == nn_at)
-        if is_list_like(comp) and comp.all():
-            nv = v.copy()
-            nv[m] = nn_at
-            return nv
+
+        # avoid invalid dtype comparisons
+        if not is_numeric_v_string_like(nn, nn_at):
+            comp = (nn == nn_at)
+            if is_list_like(comp) and comp.all():
+                nv = v.copy()
+                nv[m] = nn_at
+                return nv
     except (ValueError, IndexError, TypeError):
         pass
 

diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
@@ -147,6 +147,8 @@ def create_mgr(descr, item_shape=None):
     block_placements = OrderedDict()
     for d in descr.split(';'):
         d = d.strip()
+        if not len(d):
+            continue
         names, blockstr = d.partition(':')[::2]
         blockstr = blockstr.strip()
         names = names.strip().split(',')
@@ -324,7 +326,8 @@ class TestBlockManager(tm.TestCase):
 
     def setUp(self):
         self.mgr = create_mgr('a: f8; b: object; c: f8; d: object; e: f8;'
-                              'f: bool; g: i8; h: complex')
+                              'f: bool; g: i8; h: complex; i: datetime-1; j: datetime-2;'
+                              'k: M8[ns, US/Eastern]; l: M8[ns, CET];')
 
     def test_constructor_corner(self):
         pass
@@ -476,16 +479,24 @@ def test_set_change_dtype_slice(self): # GH8850
                 DataFrame([[3], [6]], columns=cols[2:]))
 
     def test_copy(self):
-        shallow = self.mgr.copy(deep=False)
-
-        # we don't guaranteee block ordering
-        for blk in self.mgr.blocks:
-            found = False
-            for cp_blk in shallow.blocks:
-                if cp_blk.values is blk.values:
-                    found = True
-                    break
-            self.assertTrue(found)
+        cp = self.mgr.copy(deep=False)
+        for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
+
+            # view assertion
+            self.assertTrue(cp_blk.equals(blk))
+            self.assertTrue(cp_blk.values.base is blk.values.base)
+
+        cp = self.mgr.copy(deep=True)
+        for blk, cp_blk in zip(self.mgr.blocks, cp.blocks):
+
+            # copy assertion
+            # we either have a None for a base or in case of some blocks it is an array (e.g. datetimetz),
+            # but was copied
+            self.assertTrue(cp_blk.equals(blk))
+            if cp_blk.values.base is not None and blk.values.base is not None:
+                self.assertFalse(cp_blk.values.base is blk.values.base)
+            else:
+                self.assertTrue(cp_blk.values.base is None and blk.values.base is None)
 
     def test_sparse(self):
         mgr = create_mgr('a: sparse-1; b: sparse-2')
@@ -688,7 +699,10 @@ def test_consolidate_ordering_issues(self):
         self.mgr.set('g', randn(N))
         self.mgr.set('h', randn(N))
 
+        # we have datetime/tz blocks in self.mgr
         cons = self.mgr.consolidate()
+        self.assertEqual(cons.nblocks, 4)
+        cons = self.mgr.consolidate().get_numeric_data()
         self.assertEqual(cons.nblocks, 1)
         assert_almost_equal(cons.blocks[0].mgr_locs,
                             np.arange(len(cons.items)))

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -5111,12 +5111,50 @@ def test_cov(self):
         self.assertTrue(isnull(ts1.cov(ts2, min_periods=12)))
 
     def test_copy(self):
-        ts = self.ts.copy()
 
-        ts[::2] = np.NaN
+        for deep in [None, False, True]:
+            s = Series(np.arange(10),dtype='float64')
+
+            # default deep is True
+            if deep is None:
+                s2 = s.copy()
+            else:
+                s2 = s.copy(deep=deep)
+
+            s2[::2] = np.NaN
+
+            if deep is None or deep is True:
+                # Did not modify original Series
+                self.assertTrue(np.isnan(s2[0]))
+                self.assertFalse(np.isnan(s[0]))
+            else:
 
-        # Did not modify original Series
-        self.assertFalse(np.isnan(self.ts[0]))
+                # we DID modify the original Series
+                self.assertTrue(np.isnan(s2[0]))
+                self.assertTrue(np.isnan(s[0]))
+
+        # GH 11794
+        # copy of tz-aware
+        expected = Series([Timestamp('2012/01/01', tz='UTC')])
+        expected2 = Series([Timestamp('1999/01/01', tz='UTC')])
+
+        for deep in [None, False, True]:
+            s = Series([Timestamp('2012/01/01', tz='UTC')])
+
+            if deep is None:
+                s2 = s.copy()
+            else:
+                s2 = s.copy(deep=deep)
+
+            s2[0] = pd.Timestamp('1999/01/01', tz='UTC')
+
+            # default deep is True
+            if deep is None or deep is True:
+                assert_series_equal(s, expected)
+                assert_series_equal(s2, expected2)
+            else:
+                assert_series_equal(s, expected2)
+                assert_series_equal(s2, expected2)
 
     def test_count(self):
         self.assertEqual(self.ts.count(), len(self.ts))