Skip to content

Commit ba19ff9

Browse files
committed
Merge pull request #3516 from jreback/GH3416
BUG/CLN: datetime64/timedelta64
2 parents dc84742 + d54c6a7 commit ba19ff9

File tree

8 files changed

+73
-25
lines changed

8 files changed

+73
-25
lines changed

RELEASE.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,10 @@ pandas 0.11.1
5757
Note: The default value will change in 0.12 to the "no mangle" behaviour,
5858
If your code relies on this behaviour, explicitly specify mangle_dupe_cols=True
5959
in your calls.
60+
- Do not allow astypes on ``datetime64[ns]`` except to ``object``, and
61+
``timedelta64[ns]`` to ``object/int`` (GH3425_)
62+
- Do not allow datetimelike/timedeltalike creation except with valid types
63+
(e.g. cannot pass ``datetime64[ms]``) (GH3423_)
6064

6165
**Bug Fixes**
6266

@@ -88,11 +92,15 @@ pandas 0.11.1
8892
- Fixed bug in mixed-frame assignment with aligned series (GH3492_)
8993
- Fixed bug in selecting month/quarter/year from a series would not select the time element
9094
on the last day (GH3546_)
95+
- Properly convert np.datetime64 objects in a Series (GH3416_)
9196

9297
.. _GH3164: https://github.com/pydata/pandas/issues/3164
9398
.. _GH2786: https://github.com/pydata/pandas/issues/2786
9499
.. _GH2194: https://github.com/pydata/pandas/issues/2194
95100
.. _GH3230: https://github.com/pydata/pandas/issues/3230
101+
.. _GH3425: https://github.com/pydata/pandas/issues/3425
102+
.. _GH3416: https://github.com/pydata/pandas/issues/3416
103+
.. _GH3423: https://github.com/pydata/pandas/issues/3423
96104
.. _GH3251: https://github.com/pydata/pandas/issues/3251
97105
.. _GH3379: https://github.com/pydata/pandas/issues/3379
98106
.. _GH3480: https://github.com/pydata/pandas/issues/3480

pandas/core/common.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ class AmbiguousIndexError(PandasError, KeyError):
4343

4444

4545
_POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ])
46+
_NS_DTYPE = np.dtype('M8[ns]')
47+
_TD_DTYPE = np.dtype('m8[ns]')
48+
_INT64_DTYPE = np.dtype(np.int64)
4649

4750
def isnull(obj):
4851
'''
@@ -1084,6 +1087,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
10841087

10851088
if is_datetime64 or is_timedelta64:
10861089

1090+
# force the dtype if needed
1091+
if is_datetime64 and dtype != _NS_DTYPE:
1092+
raise TypeError("cannot convert datetimelike to dtype [%s]" % dtype)
1093+
elif is_timedelta64 and dtype != _TD_DTYPE:
1094+
raise TypeError("cannot convert timedeltalike to dtype [%s]" % dtype)
1095+
10871096
if np.isscalar(value):
10881097
if value == tslib.iNaT or isnull(value):
10891098
value = tslib.iNaT
@@ -1098,7 +1107,8 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
10981107
elif np.prod(value.shape) and value.dtype != dtype:
10991108
try:
11001109
if is_datetime64:
1101-
value = tslib.array_to_datetime(value, coerce = coerce)
1110+
from pandas.tseries.tools import to_datetime
1111+
value = to_datetime(value, coerce=coerce).values
11021112
elif is_timedelta64:
11031113
value = _possibly_cast_to_timedelta(value)
11041114
except:
@@ -1119,12 +1129,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
11191129
v = [ v ]
11201130
if len(v):
11211131
inferred_type = lib.infer_dtype(v)
1122-
if inferred_type == 'datetime':
1132+
if inferred_type in ['datetime','datetime64']:
11231133
try:
11241134
value = tslib.array_to_datetime(np.array(v))
11251135
except:
11261136
pass
1127-
elif inferred_type == 'timedelta':
1137+
elif inferred_type in ['timedelta','timedelta64']:
11281138
value = _possibly_cast_to_timedelta(value)
11291139

11301140
return value
@@ -1515,9 +1525,24 @@ def _astype_nansafe(arr, dtype, copy = True):
15151525
if not isinstance(dtype, np.dtype):
15161526
dtype = np.dtype(dtype)
15171527

1518-
if issubclass(arr.dtype.type, np.datetime64):
1528+
if is_datetime64_dtype(arr):
15191529
if dtype == object:
15201530
return tslib.ints_to_pydatetime(arr.view(np.int64))
1531+
elif issubclass(dtype.type, np.int):
1532+
return arr.view(dtype)
1533+
elif dtype != _NS_DTYPE:
1534+
raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype,dtype))
1535+
return arr.astype(_NS_DTYPE)
1536+
elif is_timedelta64_dtype(arr):
1537+
if issubclass(dtype.type, np.int):
1538+
return arr.view(dtype)
1539+
elif dtype == object:
1540+
return arr.astype(object)
1541+
1542+
# in py3, timedelta64[ns] are int64
1543+
elif (py3compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not py3compat.PY3 and dtype != _TD_DTYPE):
1544+
raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype))
1545+
return arr.astype(_TD_DTYPE)
15211546
elif (np.issubdtype(arr.dtype, np.floating) and
15221547
np.issubdtype(dtype, np.integer)):
15231548

@@ -1721,9 +1746,6 @@ def _check_as_is(x):
17211746
self.queue.truncate(0)
17221747

17231748

1724-
_NS_DTYPE = np.dtype('M8[ns]')
1725-
1726-
17271749
def _concat_compat(to_concat, axis=0):
17281750
# filter empty arrays
17291751
to_concat = [x for x in to_concat if x.shape[axis] > 0]
@@ -1751,7 +1773,6 @@ def _to_pydatetime(x):
17511773

17521774
return x
17531775

1754-
17551776
def _where_compat(mask, arr1, arr2):
17561777
if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE:
17571778
new_vals = np.where(mask, arr1.view(np.int64), arr2.view(np.int64))

pandas/core/internals.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from numpy import nan
55
import numpy as np
66

7-
from pandas.core.common import _possibly_downcast_to_dtype, isnull
7+
from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
88
from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
99
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
1010
import pandas.core.common as com
@@ -740,10 +740,6 @@ def should_store(self, value):
740740
(np.integer, np.floating, np.complexfloating,
741741
np.datetime64, np.bool_))
742742

743-
_NS_DTYPE = np.dtype('M8[ns]')
744-
_TD_DTYPE = np.dtype('m8[ns]')
745-
746-
747743
class DatetimeBlock(Block):
748744
_can_hold_na = True
749745

pandas/tests/test_series.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,27 @@ def test_constructor_dtype_datetime64(self):
469469
self.assert_(isnull(s[1]) == True)
470470
self.assert_(s.dtype == 'M8[ns]')
471471

472+
# GH3416
473+
dates = [
474+
np.datetime64(datetime(2013, 1, 1)),
475+
np.datetime64(datetime(2013, 1, 2)),
476+
np.datetime64(datetime(2013, 1, 3)),
477+
]
478+
479+
s = Series(dates)
480+
self.assert_(s.dtype == 'M8[ns]')
481+
482+
s.ix[0] = np.nan
483+
self.assert_(s.dtype == 'M8[ns]')
484+
485+
# invalid astypes
486+
for t in ['s','D','us','ms']:
487+
self.assertRaises(TypeError, s.astype, 'M8[%s]' % t)
488+
489+
# GH3414 related
490+
self.assertRaises(TypeError, lambda x: Series(Series(dates).astype('int')/1000000,dtype='M8[ms]'))
491+
self.assertRaises(TypeError, lambda x: Series(dates, dtype='datetime64'))
492+
472493
def test_constructor_dict(self):
473494
d = {'a': 0., 'b': 1., 'c': 2.}
474495
result = Series(d, index=['b', 'c', 'd', 'a'])
@@ -1809,6 +1830,13 @@ def test_constructor_dtype_timedelta64(self):
18091830
td = Series([ timedelta(days=i) for i in range(3) ] + [ np.nan ], dtype='m8[ns]' )
18101831
self.assert_(td.dtype=='timedelta64[ns]')
18111832

1833+
# invalid astypes
1834+
for t in ['s','D','us','ms']:
1835+
self.assertRaises(TypeError, td.astype, 'm8[%s]' % t)
1836+
1837+
# valid astype
1838+
td.astype('int')
1839+
18121840
# this is an invalid casting
18131841
self.assertRaises(Exception, Series, [ timedelta(days=i) for i in range(3) ] + [ 'foo' ], dtype='m8[ns]' )
18141842

pandas/tseries/index.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import numpy as np
88

9-
from pandas.core.common import isnull
9+
from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE
1010
from pandas.core.index import Index, Int64Index
1111
from pandas.tseries.frequencies import (
1212
infer_freq, to_offset, get_period_alias,
@@ -92,9 +92,6 @@ class TimeSeriesError(Exception):
9292

9393

9494
_midnight = time(0, 0)
95-
_NS_DTYPE = np.dtype('M8[ns]')
96-
_INT64_DTYPE = np.dtype(np.int64)
97-
9895

9996
class DatetimeIndex(Int64Index):
10097
"""

pandas/tseries/period.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import pandas.tseries.frequencies as _freq_mod
1313

1414
import pandas.core.common as com
15-
from pandas.core.common import isnull
15+
from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE
1616
from pandas.util import py3compat
1717

1818
from pandas.lib import Timestamp
@@ -516,10 +516,6 @@ def wrapper(self, other):
516516
return result
517517
return wrapper
518518

519-
_INT64_DTYPE = np.dtype(np.int64)
520-
_NS_DTYPE = np.dtype('M8[ns]')
521-
522-
523519
class PeriodIndex(Int64Index):
524520
"""
525521
Immutable ndarray holding ordinal values indicating regular periods in

pandas/tseries/tests/test_timeseries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1470,7 +1470,7 @@ def test_frame_datetime64_handling_groupby(self):
14701470
(3, np.datetime64('2012-07-04'))],
14711471
columns=['a', 'date'])
14721472
result = df.groupby('a').first()
1473-
self.assertEqual(result['date'][3], np.datetime64('2012-07-03'))
1473+
self.assertEqual(result['date'][3], datetime(2012,7,3))
14741474

14751475
def test_series_interpolate_intraday(self):
14761476
# #1698

pandas/tseries/tools.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def _maybe_get_tz(tz):
5050

5151

5252
def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
53-
format=None):
53+
format=None, coerce=False):
5454
"""
5555
Convert argument to datetime
5656
@@ -68,6 +68,7 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
6868
If True returns a DatetimeIndex, if False returns ndarray of values
6969
format : string, default None
7070
strftime to parse time, eg "%d/%m/%Y"
71+
coerce : force errors to NaT (False by default)
7172
7273
Returns
7374
-------
@@ -84,7 +85,8 @@ def _convert_f(arg):
8485
result = tslib.array_strptime(arg, format)
8586
else:
8687
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
87-
utc=utc, dayfirst=dayfirst)
88+
utc=utc, dayfirst=dayfirst,
89+
coerce=coerce)
8890
if com.is_datetime64_dtype(result) and box:
8991
result = DatetimeIndex(result, tz='utc' if utc else None)
9092
return result

0 commit comments

Comments
 (0)