Skip to content

Add same_value casting to np.astype #93

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/source/reference/c-api/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4452,5 +4452,10 @@ Enumerated Types

Allow any cast, no matter what kind of data loss may occur.

.. c:enumerator:: NPY_SAME_VALUE_CASTING

Allow any cast, but error if any values change during the cast. Currently
supported only in ``ndarray.astype(... casting='same_value')``

.. index::
pair: ndarray; C-API
1 change: 1 addition & 0 deletions numpy/__init__.cython-30.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ cdef extern from "numpy/arrayobject.h":
NPY_SAFE_CASTING
NPY_SAME_KIND_CASTING
NPY_UNSAFE_CASTING
NPY_SAME_VALUE_CASTING

ctypedef enum NPY_CLIPMODE:
NPY_CLIP
Expand Down
1 change: 1 addition & 0 deletions numpy/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ cdef extern from "numpy/arrayobject.h":
NPY_SAFE_CASTING
NPY_SAME_KIND_CASTING
NPY_UNSAFE_CASTING
NPY_SAME_VALUE_CASTING

ctypedef enum NPY_CLIPMODE:
NPY_CLIP
Expand Down
3 changes: 2 additions & 1 deletion numpy/_core/code_generators/cversions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,6 @@
# Version 19 (NumPy 2.2.0) No change
0x00000013 = 2b8f1f4da822491ff030b2b37dff07e3
# Version 20 (NumPy 2.3.0)
# Version 20 (NumPy 2.4.0) No change
0x00000014 = e56b74d32a934d085e7c3414cb9999b8,
# Version 21 (NumPy 2.4.0) Add 'same_value' casting, header additions
0x00000015 = e56b74d32a934d085e7c3414cb9999b8,
9 changes: 8 additions & 1 deletion numpy/_core/include/numpy/dtype_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,15 @@ typedef struct PyArrayMethod_Context_tag {

/* Operand descriptors, filled in by resolve_descriptors */
PyArray_Descr *const *descriptors;
#if NPY_FEATURE_VERSION > NPY_2_3_API_VERSION
void * _reserved;
/*
* Optional flag to pass information into the inner loop
* If set, it will be NPY_CASTING
*/
uint64_t flags;
/* Structure may grow (this is harmless for DType authors) */
#endif
} PyArrayMethod_Context;


Expand Down Expand Up @@ -144,7 +152,6 @@ typedef struct {
#define NPY_METH_contiguous_indexed_loop 9
#define _NPY_METH_static_data 10


/*
* The resolve descriptors function, must be able to handle NULL values for
* all output (but not input) `given_descrs` and fill `loop_descrs`.
Expand Down
2 changes: 2 additions & 0 deletions numpy/_core/include/numpy/ndarraytypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ typedef enum {
NPY_SAME_KIND_CASTING=3,
/* Allow any casts */
NPY_UNSAFE_CASTING=4,
/* Allow any casts, check that no values overflow/change */
NPY_SAME_VALUE_CASTING=5,
} NPY_CASTING;

typedef enum {
Expand Down
5 changes: 4 additions & 1 deletion numpy/_core/include/numpy/numpyconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
#define NPY_2_1_API_VERSION 0x00000013
#define NPY_2_2_API_VERSION 0x00000013
#define NPY_2_3_API_VERSION 0x00000014
#define NPY_2_4_API_VERSION 0x00000015


/*
Expand Down Expand Up @@ -172,8 +173,10 @@
#define NPY_FEATURE_VERSION_STRING "2.0"
#elif NPY_FEATURE_VERSION == NPY_2_1_API_VERSION
#define NPY_FEATURE_VERSION_STRING "2.1"
#elif NPY_FEATURE_VERSION == NPY_2_3_API_VERSION /* also 2.4 */
#elif NPY_FEATURE_VERSION == NPY_2_3_API_VERSION
#define NPY_FEATURE_VERSION_STRING "2.3"
#elif NPY_FEATURE_VERSION == NPY_2_4_API_VERSION
#define NPY_FEATURE_VERSION_STRING "2.4"
#else
#error "Missing version string define for new NumPy version."
#endif
Expand Down
3 changes: 2 additions & 1 deletion numpy/_core/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ C_ABI_VERSION = '0x02000000'
# 0x00000013 - 2.1.x
# 0x00000013 - 2.2.x
# 0x00000014 - 2.3.x
C_API_VERSION = '0x00000014'
# 0x00000015 - 2.4.x
C_API_VERSION = '0x00000015'

# Check whether we have a mismatch between the set C API VERSION and the
# actual C API VERSION. Will raise a MismatchCAPIError if so.
Expand Down
4 changes: 2 additions & 2 deletions numpy/_core/src/common/array_assign.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
NPY_NO_EXPORT int
raw_array_assign_scalar(int ndim, npy_intp const *shape,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
PyArray_Descr *src_dtype, char *src_data);
PyArray_Descr *src_dtype, char *src_data, NPY_CASTING casting);

/*
* Assigns the scalar value to every element of the destination raw array
Expand All @@ -59,7 +59,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp const *shape,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
PyArray_Descr *src_dtype, char *src_data,
PyArray_Descr *wheremask_dtype, char *wheremask_data,
npy_intp const *wheremask_strides);
npy_intp const *wheremask_strides, NPY_CASTING casting);

/******** LOW-LEVEL ARRAY MANIPULATION HELPERS ********/

Expand Down
1 change: 1 addition & 0 deletions numpy/_core/src/multiarray/_multiarray_tests.c.src
Original file line number Diff line number Diff line change
Expand Up @@ -2168,6 +2168,7 @@ run_casting_converter(PyObject* NPY_UNUSED(self), PyObject *args)
case NPY_SAFE_CASTING: return PyUnicode_FromString("NPY_SAFE_CASTING");
case NPY_SAME_KIND_CASTING: return PyUnicode_FromString("NPY_SAME_KIND_CASTING");
case NPY_UNSAFE_CASTING: return PyUnicode_FromString("NPY_UNSAFE_CASTING");
case NPY_SAME_VALUE_CASTING: return PyUnicode_FromString("NPY_SAME_VALUE_CASTING");
default: return PyLong_FromLong(casting);
}
}
Expand Down
86 changes: 53 additions & 33 deletions numpy/_core/src/multiarray/array_assign_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,22 +79,20 @@ copycast_isaligned(int ndim, npy_intp const *shape,
NPY_NO_EXPORT int
raw_array_assign_array(int ndim, npy_intp const *shape,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
PyArray_Descr *src_dtype, char *src_data, npy_intp const *src_strides)
PyArray_Descr *src_dtype, char *src_data, npy_intp const *src_strides,
int flags)
{
int idim;
npy_intp shape_it[NPY_MAXDIMS];
npy_intp dst_strides_it[NPY_MAXDIMS];
npy_intp src_strides_it[NPY_MAXDIMS];
npy_intp coord[NPY_MAXDIMS];

int aligned;
int aligned = flags & 0x01;
int same_value_cast = (flags & 0x02) == 0x02;

NPY_BEGIN_THREADS_DEF;

aligned =
copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareTwoRawArrayIter(
ndim, shape,
Expand All @@ -120,21 +118,30 @@ raw_array_assign_array(int ndim, npy_intp const *shape,

/* Get the function to do the casting */
NPY_cast_info cast_info;
NPY_ARRAYMETHOD_FLAGS flags;
NPY_ARRAYMETHOD_FLAGS method_flags;
if (PyArray_GetDTypeTransferFunction(aligned,
src_strides_it[0], dst_strides_it[0],
src_dtype, dst_dtype,
0,
&cast_info, &flags) != NPY_SUCCEED) {
&cast_info, &method_flags) != NPY_SUCCEED) {
return -1;
}

if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
if (!(method_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
npy_clear_floatstatus_barrier((char*)&src_data);
}

if (same_value_cast) {
#if NPY_FEATURE_VERSION > NPY_2_3_API_VERSION
cast_info.context.flags |= NPY_SAME_VALUE_CASTING;
#else
PyErr_SetString(PyExc_NotImplementedError,
"raw_array_assign_array with 'same_value' casting not implemented yet");
#endif
}

/* Ensure number of elements exceeds threshold for threading */
if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
if (!(method_flags & NPY_METH_REQUIRES_PYAPI)) {
npy_intp nitems = 1, i;
for (i = 0; i < ndim; i++) {
nitems *= shape_it[i];
Expand All @@ -144,11 +151,14 @@ raw_array_assign_array(int ndim, npy_intp const *shape,

npy_intp strides[2] = {src_strides_it[0], dst_strides_it[0]};

int result = 0;
NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
/* Process the innermost dimension */
char *args[2] = {src_data, dst_data};
if (cast_info.func(&cast_info.context,
args, &shape_it[0], strides, cast_info.auxdata) < 0) {
result = cast_info.func(&cast_info.context,
args, &shape_it[0], strides,
cast_info.auxdata);
if (result < 0) {
goto fail;
}
} NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape_it,
Expand All @@ -158,7 +168,7 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);

if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
if (!(method_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
int fpes = npy_get_floatstatus_barrier((char*)&src_data);
if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
return -1;
Expand All @@ -169,6 +179,7 @@ raw_array_assign_array(int ndim, npy_intp const *shape,
fail:
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
HandleArrayMethodError(result, "astype", method_flags);
return -1;
}

Expand All @@ -183,7 +194,7 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
PyArray_Descr *dst_dtype, char *dst_data, npy_intp const *dst_strides,
PyArray_Descr *src_dtype, char *src_data, npy_intp const *src_strides,
PyArray_Descr *wheremask_dtype, char *wheremask_data,
npy_intp const *wheremask_strides)
npy_intp const *wheremask_strides, int flags)
{
int idim;
npy_intp shape_it[NPY_MAXDIMS];
Expand All @@ -192,14 +203,11 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
npy_intp wheremask_strides_it[NPY_MAXDIMS];
npy_intp coord[NPY_MAXDIMS];

int aligned;
int aligned = flags & 0x01;
int same_value_cast = (flags & 0x02) == 0x02;

NPY_BEGIN_THREADS_DEF;

aligned =
copycast_isaligned(ndim, shape, dst_dtype, dst_data, dst_strides) &&
copycast_isaligned(ndim, shape, src_dtype, src_data, src_strides);

/* Use raw iteration with no heap allocation */
if (PyArray_PrepareThreeRawArrayIter(
ndim, shape,
Expand Down Expand Up @@ -229,39 +237,48 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,

/* Get the function to do the casting */
NPY_cast_info cast_info;
NPY_ARRAYMETHOD_FLAGS flags;
NPY_ARRAYMETHOD_FLAGS method_flags;
if (PyArray_GetMaskedDTypeTransferFunction(aligned,
src_strides_it[0],
dst_strides_it[0],
wheremask_strides_it[0],
src_dtype, dst_dtype, wheremask_dtype,
0,
&cast_info, &flags) != NPY_SUCCEED) {
&cast_info, &method_flags) != NPY_SUCCEED) {
return -1;
}
if (same_value_cast) {
/* cast_info.context.flags |= NPY_SAME_VALUE_CASTING; */
PyErr_SetString(PyExc_NotImplementedError,
"raw_array_wheremasked_assign_array with 'same_value' casting not implemented yet");
return -1;
}

if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
if (!(method_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
npy_clear_floatstatus_barrier(src_data);
}
if (!(flags & NPY_METH_REQUIRES_PYAPI)) {
if (!(method_flags & NPY_METH_REQUIRES_PYAPI)) {
npy_intp nitems = 1, i;
for (i = 0; i < ndim; i++) {
nitems *= shape_it[i];
}
NPY_BEGIN_THREADS_THRESHOLDED(nitems);
}

npy_intp strides[2] = {src_strides_it[0], dst_strides_it[0]};

int result = 0;
NPY_RAW_ITER_START(idim, ndim, coord, shape_it) {
PyArray_MaskedStridedUnaryOp *stransfer;
stransfer = (PyArray_MaskedStridedUnaryOp *)cast_info.func;

/* Process the innermost dimension */
char *args[2] = {src_data, dst_data};
if (stransfer(&cast_info.context,
args, &shape_it[0], strides,
(npy_bool *)wheremask_data, wheremask_strides_it[0],
cast_info.auxdata) < 0) {
result = stransfer(&cast_info.context,
args, &shape_it[0], strides,
(npy_bool *)wheremask_data, wheremask_strides_it[0],
cast_info.auxdata);
if (result < 0) {
goto fail;
}
} NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape_it,
Expand All @@ -272,18 +289,17 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp const *shape,
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);

if (!(flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
if (!(method_flags & NPY_METH_NO_FLOATINGPOINT_ERRORS)) {
int fpes = npy_get_floatstatus_barrier(src_data);
if (fpes && PyUFunc_GiveFloatingpointErrors("cast", fpes) < 0) {
return -1;
}
}

return 0;

fail:
NPY_END_THREADS;
NPY_cast_info_xfree(&cast_info);
HandleArrayMethodError(result, "astype", method_flags);
return -1;
}

Expand All @@ -307,7 +323,6 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
NPY_CASTING casting)
{
int copied_src = 0;

npy_intp src_strides[NPY_MAXDIMS];

/* Use array_assign_scalar if 'src' NDIM is 0 */
Expand Down Expand Up @@ -438,12 +453,17 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
}
}

int aligned =
copycast_isaligned(PyArray_NDIM(dst), PyArray_DIMS(dst), PyArray_DESCR(dst), PyArray_DATA(dst), PyArray_STRIDES(dst)) &&
copycast_isaligned(PyArray_NDIM(dst), PyArray_DIMS(dst), PyArray_DESCR(src), PyArray_DATA(src), src_strides);
int flags = ((NPY_SAME_VALUE_CASTING == casting) << 1) | aligned;

if (wheremask == NULL) {
/* A straightforward value assignment */
/* Do the assignment with raw array iteration */
if (raw_array_assign_array(PyArray_NDIM(dst), PyArray_DIMS(dst),
PyArray_DESCR(dst), PyArray_DATA(dst), PyArray_STRIDES(dst),
PyArray_DESCR(src), PyArray_DATA(src), src_strides) < 0) {
PyArray_DESCR(src), PyArray_DATA(src), src_strides, flags) < 0){
goto fail;
}
}
Expand All @@ -465,7 +485,7 @@ PyArray_AssignArray(PyArrayObject *dst, PyArrayObject *src,
PyArray_DESCR(dst), PyArray_DATA(dst), PyArray_STRIDES(dst),
PyArray_DESCR(src), PyArray_DATA(src), src_strides,
PyArray_DESCR(wheremask), PyArray_DATA(wheremask),
wheremask_strides) < 0) {
wheremask_strides, flags) < 0) {
goto fail;
}
}
Expand Down
Loading