diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 26b7eaca87a04..3e2f35c7b4721 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -49,7 +49,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # Note: this grep pattern is (intended to be) equivalent to the python # regex r'(?])> ' MSG='Linting .pyx code for spacing conventions in casting' ; echo $MSG - ! grep -r -E --include '*.pyx' --include '*.pxi.in' '> ' pandas/_libs | grep -v '[ ->]> ' + ! grep -r -E --include '*.pyx' --include '*.pxi.in' '[a-zA-Z0-9*]> ' pandas/_libs RET=$(($RET + $?)) ; echo $MSG "DONE" # readability/casting: Warnings about C casting instead of C++ casting diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 24828db64c392..d675ceab13667 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -128,11 +128,11 @@ def is_lexsorted(list_of_arrays: list) -> bint: nlevels = len(list_of_arrays) n = len(list_of_arrays[0]) - cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) + cdef int64_t **vecs = malloc(nlevels * sizeof(int64_t*)) for i in range(nlevels): arr = list_of_arrays[i] assert arr.dtype.name == 'int64' - vecs[i] = cnp.PyArray_DATA(arr) + vecs[i] = cnp.PyArray_DATA(arr) # Assume uniqueness?? with nogil: diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index c72b4001dcb79..9e758700811a8 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -44,7 +44,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: if na_count == n: return NaN - tmp = malloc((n - na_count) * sizeof(float64_t)) + tmp = malloc((n - na_count) * sizeof(float64_t)) j = 0 for i in range(n): @@ -121,7 +121,7 @@ def group_median_float64(ndarray[float64_t, ndim=2] out, counts[:] = _counts[1:] data = np.empty((K, N), dtype=np.float64) - ptr = cnp.PyArray_DATA(data) + ptr = cnp.PyArray_DATA(data) take_2d_axis1_float64_float64(values.T, indexer, out=data) diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index c2305c8f3ff00..6e66693decc01 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -54,8 +54,8 @@ def hash_object_array(object[:] arr, object key, object encoding='utf8'): n = len(arr) # create an array of bytes - vecs = malloc(n * sizeof(char *)) - lens = malloc(n * sizeof(uint64_t)) + vecs = malloc(n * sizeof(char *)) + lens = malloc(n * sizeof(uint64_t)) for i in range(n): val = arr[i] diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index affb6a038074a..36ed8a88aa78b 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -590,13 +590,13 @@ cdef class StringHashTable(HashTable): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] labels = np.empty(n, dtype=np.int64) - int64_t *resbuf = labels.data + int64_t *resbuf = labels.data khiter_t k kh_str_t *table = self.table const char *v const char **vecs - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] v = util.get_c_string(val) @@ -639,7 +639,7 @@ cdef class StringHashTable(HashTable): const char *v const char **vecs - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) uindexer = np.empty(n, dtype=np.int64) for i in range(n): val = values[i] @@ -674,7 +674,7 @@ cdef class StringHashTable(HashTable): int64_t[:] locs = np.empty(n, dtype=np.int64) # these by-definition *must* be strings - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -707,7 +707,7 @@ cdef class StringHashTable(HashTable): khiter_t k # these by-definition *must* be strings - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 027a4e36204dc..a2a718aa8b591 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -361,7 +361,7 @@ cdef class TextReader: if not isinstance(encoding, bytes): encoding = encoding.encode('utf-8') encoding = encoding.lower() - self.c_encoding = encoding + self.c_encoding = encoding else: self.c_encoding = NULL @@ -611,7 +611,7 @@ cdef class TextReader: for i in self.skiprows: parser_add_skiprow(self.parser, i) else: - self.parser.skipfunc = self.skiprows + self.parser.skipfunc = self.skiprows cdef _setup_parser_source(self, source): cdef: @@ -668,7 +668,7 @@ cdef class TextReader: source = icom.UTF8Recoder(source, self.encoding.decode('utf-8')) self.encoding = b'utf-8' - self.c_encoding = self.encoding + self.c_encoding = self.encoding self.handle = source @@ -1444,7 +1444,7 @@ cdef _string_box_factorize(parser_t *parser, int64_t col, pyval = PyBytes_FromString(word) k = kh_put_strbox(table, word, &ret) - table.vals[k] = pyval + table.vals[k] = pyval result[i] = pyval @@ -1498,7 +1498,7 @@ cdef _string_box_utf8(parser_t *parser, int64_t col, pyval = PyUnicode_FromString(word) k = kh_put_strbox(table, word, &ret) - table.vals[k] = pyval + table.vals[k] = pyval result[i] = pyval @@ -1556,7 +1556,7 @@ cdef _string_box_decode(parser_t *parser, int64_t col, pyval = PyUnicode_Decode(word, size, encoding, errors) k = kh_put_strbox(table, word, &ret) - table.vals[k] = pyval + table.vals[k] = pyval result[i] = pyval @@ -1648,7 +1648,7 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, ndarray result result = np.empty(line_end - line_start, dtype='|S%d' % width) - data = result.data + data = result.data with nogil: _to_fw_string_nogil(parser, col, line_start, line_end, width, data) @@ -1695,7 +1695,7 @@ cdef _try_double(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.float64) - data = result.data + data = result.data na_fset = kset_float64_from_list(na_flist) if parser.double_converter_nogil != NULL: # if it can run without the GIL with nogil: @@ -1803,7 +1803,7 @@ cdef _try_uint64(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.uint64) - data = result.data + data = result.data uint_state_init(&state) coliter_setup(&it, parser, col, line_start) @@ -1879,7 +1879,7 @@ cdef _try_int64(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.int64) - data = result.data + data = result.data coliter_setup(&it, parser, col, line_start) with nogil: error = _try_int64_nogil(parser, col, line_start, line_end, @@ -1951,7 +1951,7 @@ cdef _try_bool_flex(parser_t *parser, int64_t col, lines = line_end - line_start result = np.empty(lines, dtype=np.uint8) - data = result.data + data = result.data with nogil: error = _try_bool_flex_nogil(parser, col, line_start, line_end, na_filter, na_hashset, true_hashset, diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 951c163522401..6f892c928805e 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -467,7 +467,7 @@ cdef class Slider: self.buf.strides[0] = self.stride cpdef advance(self, Py_ssize_t k): - self.buf.data = self.buf.data + self.stride * k + self.buf.data = self.buf.data + self.stride * k cdef move(self, int start, int end): """ @@ -572,7 +572,7 @@ cdef class BlockSlider: self.idx_slider = Slider( self.frame.index.values, self.dummy.index.values) - self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) + self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) for i, block in enumerate(self.blocks): self.base_ptrs[i] = (block).data diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 67698f1b4c2ca..bfb03ef307355 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -342,8 +342,8 @@ cdef class BlockIndex(SparseIndex): self.blengths = np.ascontiguousarray(blengths, dtype=np.int32) # in case we need - self.locbuf = self.blocs.data - self.lenbuf = self.blengths.data + self.locbuf = self.blocs.data + self.lenbuf = self.blengths.data self.length = length self.nblocks = np.int32(len(self.blocs)) @@ -853,7 +853,7 @@ def get_reindexer(ndarray[object, ndim=1] values, dict index_map): # SparseIndex index): # self.index = index -# self.buf = values.data +# self.buf = values.data def reindex_integer(ndarray[float64_t, ndim=1] values, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f55966fd053af..9fc3d23403afc 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -907,7 +907,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, trans, deltas, typ = get_dst_info(tz) - tdata = cnp.PyArray_DATA(trans) + tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) # Determine whether each date lies left of the DST transition (store in diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index a769bbb081398..ebcbea0ee30b3 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1260,7 +1260,7 @@ cdef object _period_strftime(int64_t value, int freq, object fmt): fmt = fmt.replace(pat, repl) found_pat[i] = True - formatted = c_strftime(&dts, fmt) + formatted = c_strftime(&dts, fmt) result = util.char_to_string(formatted) free(formatted) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 8de2852942865..bb7af67d14585 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1611,17 +1611,17 @@ def roll_generic(object obj, output[i] = NaN # remaining full-length windows - buf = arr.data + buf = arr.data bufarr = np.empty(win, dtype=float) - oldbuf = bufarr.data + oldbuf = bufarr.data for i from (win - offset) <= i < (N - offset): buf = buf + 1 - bufarr.data = buf + bufarr.data = buf if counts[i] >= minp: output[i] = func(bufarr, *args, **kwargs) else: output[i] = NaN - bufarr.data = oldbuf + bufarr.data = oldbuf # truncated windows at the end for i from int_max(N - offset, 0) <= i < N: