Skip to content

REF: Cleanups, typing, memoryviews in tslibs #23368

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
2 changes: 0 additions & 2 deletions pandas/_libs/missing.pxd
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

from tslibs.nattype cimport is_null_datetimelike

cpdef bint checknull(object val)
cpdef bint checknull_old(object val)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ weekday_to_int = {int_to_weekday[key]: key for key in int_to_weekday}

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this not do anything?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT the "inline" doesn't get carried across modules in this context. Since the function is never called from within ccalendar, its not accomplishing anything.

cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
"""Return the number of days in the given month of the given year.

Parameters
Expand Down
56 changes: 37 additions & 19 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ TD_DTYPE = np.dtype('m8[ns]')

UTC = pytz.UTC


# ----------------------------------------------------------------------
# Misc Helpers

# TODO: How to declare np.datetime64 as the input type?
cdef inline int64_t get_datetime64_nanos(object val) except? -1:
"""
Extract the value and unit from a np.datetime64 object, then convert the
Expand All @@ -74,7 +74,9 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
return ival


def ensure_datetime64ns(ndarray arr, copy=True):
@cython.boundscheck(False)
@cython.wraparound(False)
def ensure_datetime64ns(arr: ndarray, copy: bint = True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no spaces on the args

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been trying to figure this out too. Are you sure about this? I've been following the usage here https://www.python.org/dev/peps/pep-3107/#syntax

"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'

Expand Down Expand Up @@ -121,7 +123,7 @@ def ensure_datetime64ns(ndarray arr, copy=True):
return result


def ensure_timedelta64ns(ndarray arr, copy=True):
def ensure_timedelta64ns(arr: ndarray, copy: bint = True):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

"""
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'

Expand All @@ -133,23 +135,25 @@ def ensure_timedelta64ns(ndarray arr, copy=True):
Returns
-------
result : ndarray with dtype timedelta64[ns]

"""
return arr.astype(TD_DTYPE, copy=copy)
# TODO: check for overflows when going from a lower-resolution to nanos


def datetime_to_datetime64(object[:] values):
@cython.boundscheck(False)
@cython.wraparound(False)
def datetime_to_datetime64(values: object[:]):
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.

Parameters
----------
values : ndarray
values : ndarray[object]

Returns
-------
result : ndarray with dtype int64
result : ndarray[int64_t]
inferred_tz : tzinfo or None
"""
cdef:
Expand Down Expand Up @@ -225,6 +229,7 @@ cdef class _TSObject:

@property
def value(self):
# This is needed in order for `value` to be accessible in lib.pyx
return self.value


Expand Down Expand Up @@ -610,6 +615,8 @@ cpdef inline datetime localize_pydatetime(datetime dt, object tz):
# ----------------------------------------------------------------------
# Timezone Conversion

@cython.boundscheck(False)
@cython.wraparound(False)
cdef inline int64_t[:] _tz_convert_dst(int64_t[:] values, tzinfo tz,
bint to_utc=True):
"""
Expand Down Expand Up @@ -756,6 +763,8 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
return _tz_convert_dst(arr, tz2, to_utc=False)[0]


@cython.boundscheck(False)
@cython.wraparound(False)
cdef inline int64_t[:] _tz_convert_one_way(int64_t[:] vals, object tz,
bint to_utc):
"""
Expand Down Expand Up @@ -849,10 +858,11 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
ndarray[int64_t] trans
int64_t[:] deltas, idx_shifted
ndarray ambiguous_array
Py_ssize_t i, idx, pos, ntrans, n = len(vals)
Py_ssize_t i, idx, pos, pos_left, pos_right, ntrans, n = len(vals)
int64_t *tdata
int64_t v, left, right, val, v_left, v_right
ndarray[int64_t] result, result_a, result_b, dst_hours
int64_t v, left, right, val, v_left, v_right, delta_idx
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you avoid duplication here (int64_t[:]) is twice

int64_t[:] result, dst_hours, idx_shifted_left, idx_shifted_right
ndarray[int64_t] result_a, result_b
npy_datetimestruct dts
bint infer_dst = False, is_dst = False, fill = False
bint shift = False, fill_nonexist = False
Expand All @@ -867,7 +877,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
for i in range(n):
v = vals[i]
result[i] = _tz_convert_tzlocal_utc(v, tz, to_utc=True)
return result
return result.base # `.base` to access underlying np.array
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

asarray

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update


if is_string_object(ambiguous):
if ambiguous == 'infer':
Expand Down Expand Up @@ -929,7 +939,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,

if infer_dst:
dst_hours = np.empty(n, dtype=np.int64)
dst_hours.fill(NPY_NAT)
dst_hours[:] = NPY_NAT

# Get the ambiguous hours (given the above, these are the hours
# where result_a != result_b and neither of them are NAT)
Expand Down Expand Up @@ -970,7 +980,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
# Pull the only index and adjust
a_idx = grp[:switch_idx]
b_idx = grp[switch_idx:]
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))

# __setitem__ on dst_hours.base because indexing with
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would really rather not use .base

# an ndarray (grp) directly on a memoryview is not supported
# TODO: is `grp` necessarily contiguous? i.e. could we
# equivalently write dst_hours[grp[0]:grp[-1]] = ... ?
dst_hours.base[grp] = np.hstack((result_a[a_idx],
result_b[b_idx]))

for i in range(n):
val = vals[i]
Expand Down Expand Up @@ -1015,7 +1031,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
stamp = _render_tstamp(val)
raise pytz.NonExistentTimeError(stamp)

return result
return result.base # `.base` to access underlying np.array
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See comment above. I previously changed all of the foo.base to np.asarray(foo), but reverted that change after finding that risked incorrectly returning np.array(None) instead of raising if None gets passed to one of these functions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

from the docs

https://cython.readthedocs.io/en/latest/src/userguide/memoryviews.html

import numpy as np

def process_buffer(int[:,:] input_view not None,
                   int[:,:] output_view=None):

   if output_view is None:
       # Creating a default view, e.g.
       output_view = np.empty_like(input_view)

   # process 'input_view' into 'output_view'
   return output_view

you can use not None in the declaration to avoid this issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yah, I was hoping you had forgotten about that. Part of the goal ATM is to move the cython code closer to being valid python (linting!), and not None moves that in the wrong direction. Will un-revert, and open an issue with Cython on implementing a py-friendly version of not None. Ditto for the other open cython PR.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well I actually find it weird that cython code accepts None here at all by default.



cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
Expand Down Expand Up @@ -1051,7 +1067,7 @@ cdef inline str _render_tstamp(int64_t val):
# Normalization


def normalize_date(object dt):
def normalize_date(dt: object) -> datetime:
"""
Normalize datetime.datetime value to midnight. Returns datetime.date as a
datetime.datetime at midnight
Expand Down Expand Up @@ -1085,7 +1101,7 @@ def normalize_date(object dt):

@cython.wraparound(False)
@cython.boundscheck(False)
def normalize_i8_timestamps(int64_t[:] stamps, tz=None):
def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
"""
Normalize each of the (nanosecond) timestamps in the given array by
rounding down to the beginning of the day (i.e. midnight). If `tz`
Expand Down Expand Up @@ -1122,7 +1138,7 @@ def normalize_i8_timestamps(int64_t[:] stamps, tz=None):

@cython.wraparound(False)
@cython.boundscheck(False)
cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz):
cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz):
"""
Normalize each of the (nanosecond) timestamps in the given array by
rounding down to the beginning of the day (i.e. midnight) for the
Expand All @@ -1131,7 +1147,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, object tz):
Parameters
----------
stamps : int64 ndarray
tz : tzinfo or None
tz : tzinfo

Returns
-------
Expand Down Expand Up @@ -1207,7 +1223,9 @@ cdef inline int64_t _normalized_stamp(npy_datetimestruct *dts) nogil:
return dtstruct_to_dt64(dts)


def is_date_array_normalized(int64_t[:] stamps, tz=None):
@cython.boundscheck(False)
@cython.wraparound(False)
def is_date_array_normalized(int64_t[:] stamps, object tz=None):
"""
Check if all of the given (nanosecond) timestamps are normalized to
midnight, i.e. hour == minute == second == 0. If the optional timezone
Expand Down
Loading