From 2de660e8d018feb6f0e399877ef97d43617109ae Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 5 Dec 2022 16:15:47 +0100 Subject: [PATCH 1/2] BUG: infer-objects raising for bytes Series --- pandas/core/internals/blocks.py | 3 +++ pandas/tests/series/methods/test_infer_objects.py | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 95300c888eede..e5a5cda744736 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1969,6 +1969,9 @@ def convert( attempt to cast any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! """ + if self.dtype != object: + return [self] + values = self.values if values.ndim == 2: # maybe_split ensures we only get here with values.shape[0] == 1, diff --git a/pandas/tests/series/methods/test_infer_objects.py b/pandas/tests/series/methods/test_infer_objects.py index bb83f62f5ebb5..c29079f5e95dd 100644 --- a/pandas/tests/series/methods/test_infer_objects.py +++ b/pandas/tests/series/methods/test_infer_objects.py @@ -21,3 +21,10 @@ def test_infer_objects_series(self): assert actual.dtype == "object" tm.assert_series_equal(actual, expected) + + def test_infer_objects_bytes(self): + # GH#49650 + ser = Series([b"a"], dtype="bytes") + expected = ser.copy() + result = ser.infer_objects() + tm.assert_series_equal(result, expected) From 50a33f681ee095b32fa833b0fed1b9a3f700d0f9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Wed, 7 Dec 2022 20:38:02 +0100 Subject: [PATCH 2/2] Merge remote-tracking branch 'upstream/main' into 49650 # Conflicts: # pandas/tests/series/methods/test_infer_objects.py --- .../autoupdate-pre-commit-config.yml | 2 +- .github/workflows/code-checks.yml | 6 +- .github/workflows/package-checks.yml | 2 +- ci/deps/actions-38-minimum_versions.yaml | 2 +- .../development/contributing_codebase.rst | 17 +- doc/source/getting_started/install.rst | 2 +- doc/source/user_guide/groupby.rst | 7 +- doc/source/whatsnew/v2.0.0.rst | 46 +++- pandas/_libs/algos.pyx | 6 +- pandas/_libs/groupby.pyx | 8 +- pandas/_libs/hashing.pyx | 8 +- pandas/_libs/hashtable.pxd | 2 +- pandas/_libs/hashtable_class_helper.pxi.in | 10 +- pandas/_libs/index.pyx | 12 +- pandas/_libs/intervaltree.pxi.in | 3 +- pandas/_libs/lib.pyx | 60 +++--- pandas/_libs/missing.pyx | 6 +- pandas/_libs/parsers.pyx | 70 +++--- pandas/_libs/sparse_op_helper.pxi.in | 12 +- pandas/_libs/tslib.pyx | 4 +- pandas/_libs/tslibs/conversion.pyx | 10 +- pandas/_libs/tslibs/fields.pyx | 10 +- pandas/_libs/tslibs/nattype.pyx | 6 +- pandas/_libs/tslibs/np_datetime.pyx | 22 +- pandas/_libs/tslibs/offsets.pyx | 38 +++- pandas/_libs/tslibs/parsing.pyx | 18 +- pandas/_libs/tslibs/period.pyx | 30 +-- pandas/_libs/tslibs/timedeltas.pxd | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 14 +- pandas/_libs/tslibs/timestamps.pyx | 6 +- pandas/_libs/tslibs/timezones.pyx | 12 +- pandas/_libs/tslibs/tzconversion.pxd | 2 +- pandas/_libs/tslibs/tzconversion.pyx | 6 +- pandas/_libs/tslibs/vectorized.pyx | 2 +- pandas/_libs/window/aggregations.pyx | 200 ++++++++---------- pandas/_testing/_io.py | 2 +- pandas/compat/_optional.py | 2 +- pandas/core/arrays/datetimes.py | 40 ++-- pandas/core/arrays/sparse/array.py | 38 ++-- pandas/core/arrays/sparse/dtype.py | 10 +- pandas/core/dtypes/astype.py | 25 ++- pandas/core/dtypes/cast.py | 2 +- pandas/core/dtypes/concat.py | 13 +- pandas/core/frame.py | 13 +- pandas/core/generic.py | 10 +- pandas/core/groupby/generic.py | 22 +- pandas/core/groupby/groupby.py | 7 +- pandas/core/indexes/base.py | 103 ++++----- pandas/core/indexes/datetimes.py | 11 +- pandas/core/indexes/multi.py | 9 +- pandas/core/indexes/period.py | 8 +- pandas/core/indexes/range.py | 20 +- pandas/core/indexes/timedeltas.py | 4 +- pandas/core/internals/array_manager.py | 10 +- pandas/core/internals/blocks.py | 3 + pandas/core/internals/construction.py | 2 +- pandas/core/internals/managers.py | 4 +- pandas/core/ops/__init__.py | 34 ++- pandas/core/reshape/merge.py | 16 +- pandas/core/series.py | 16 +- pandas/core/window/common.py | 2 + pandas/io/formats/style.py | 42 ++-- pandas/io/parsers/base_parser.py | 4 +- pandas/io/sas/byteswap.pyx | 4 +- pandas/io/sas/sas.pyx | 10 +- pandas/plotting/_core.py | 8 + pandas/plotting/_matplotlib/hist.py | 8 +- pandas/tests/apply/test_frame_apply.py | 6 +- pandas/tests/apply/test_str.py | 5 +- pandas/tests/arithmetic/test_numeric.py | 6 - pandas/tests/arithmetic/test_object.py | 13 +- pandas/tests/arithmetic/test_timedelta64.py | 31 +-- pandas/tests/arrays/sparse/test_astype.py | 12 ++ pandas/tests/arrays/test_datetimes.py | 9 + pandas/tests/dtypes/test_inference.py | 1 - pandas/tests/dtypes/test_missing.py | 17 +- pandas/tests/extension/base/constructors.py | 2 +- pandas/tests/extension/base/missing.py | 2 +- pandas/tests/frame/indexing/test_xs.py | 2 +- pandas/tests/frame/methods/test_astype.py | 16 ++ pandas/tests/frame/methods/test_count.py | 2 +- .../frame/methods/test_get_numeric_data.py | 2 +- pandas/tests/frame/methods/test_quantile.py | 4 +- pandas/tests/frame/methods/test_rank.py | 2 +- pandas/tests/frame/methods/test_rename.py | 2 +- pandas/tests/frame/methods/test_to_csv.py | 2 +- pandas/tests/frame/test_constructors.py | 33 ++- pandas/tests/frame/test_reductions.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 3 +- .../tests/groupby/aggregate/test_aggregate.py | 2 +- pandas/tests/groupby/aggregate/test_cython.py | 4 +- pandas/tests/groupby/test_apply.py | 7 +- pandas/tests/groupby/test_grouping.py | 20 +- pandas/tests/groupby/test_min_max.py | 5 +- pandas/tests/groupby/test_pipe.py | 4 +- .../tests/groupby/transform/test_transform.py | 24 +-- .../tests/indexes/datetimes/test_indexing.py | 88 -------- .../tests/indexes/interval/test_interval.py | 7 + pandas/tests/indexes/multi/test_analytics.py | 5 + pandas/tests/indexes/multi/test_indexing.py | 4 - pandas/tests/indexes/numeric/test_indexing.py | 88 +------- pandas/tests/indexes/object/test_indexing.py | 14 -- pandas/tests/indexes/period/test_indexing.py | 119 +++-------- pandas/tests/indexes/test_base.py | 73 +++---- pandas/tests/indexes/test_index_new.py | 26 ++- pandas/tests/indexes/test_indexing.py | 36 ++-- .../tests/indexes/timedeltas/test_indexing.py | 31 +-- pandas/tests/indexing/test_coercion.py | 9 +- pandas/tests/indexing/test_floats.py | 11 +- pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/indexing/test_na_indexing.py | 2 +- pandas/tests/indexing/test_partial.py | 8 +- pandas/tests/internals/test_internals.py | 6 +- pandas/tests/io/excel/test_readers.py | 2 +- pandas/tests/io/formats/test_info.py | 2 +- pandas/tests/io/formats/test_to_latex.py | 8 +- pandas/tests/io/json/test_pandas.py | 15 +- pandas/tests/io/parser/dtypes/test_empty.py | 14 +- pandas/tests/io/parser/test_index_col.py | 1 + pandas/tests/io/parser/test_parse_dates.py | 5 +- pandas/tests/io/parser/test_read_fwf.py | 2 +- .../io/parser/usecols/test_usecols_basic.py | 4 +- pandas/tests/io/pytables/test_put.py | 10 +- pandas/tests/io/test_html.py | 5 +- pandas/tests/io/test_parquet.py | 4 +- pandas/tests/io/test_pickle.py | 2 +- pandas/tests/plotting/test_series.py | 5 +- pandas/tests/resample/test_base.py | 2 +- pandas/tests/reshape/concat/test_empty.py | 18 +- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 32 +-- pandas/tests/reshape/test_pivot.py | 8 +- pandas/tests/series/methods/test_reindex.py | 2 +- pandas/tests/series/test_constructors.py | 16 +- pandas/tests/series/test_ufunc.py | 4 +- pandas/tests/strings/test_strings.py | 2 +- pandas/tests/test_downstream.py | 10 +- pandas/tests/window/test_expanding.py | 4 +- pandas/tests/window/test_pairwise.py | 4 +- pyproject.toml | 5 +- 140 files changed, 985 insertions(+), 1137 deletions(-) diff --git a/.github/workflows/autoupdate-pre-commit-config.yml b/.github/workflows/autoupdate-pre-commit-config.yml index 9a41871c26062..5963e819c0dae 100644 --- a/.github/workflows/autoupdate-pre-commit-config.yml +++ b/.github/workflows/autoupdate-pre-commit-config.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 - name: Cache multiple paths uses: actions/cache@v3 with: diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 98770854f53dd..91deb7f517ca7 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -30,9 +30,9 @@ jobs: uses: actions/checkout@v3 - name: Install Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: - python-version: '3.9.7' + python-version: '3.9' - name: Run pre-commit uses: pre-commit/action@v2.0.3 @@ -177,7 +177,7 @@ jobs: - name: Setup Python id: setup_python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: '3.8' cache: 'pip' diff --git a/.github/workflows/package-checks.yml b/.github/workflows/package-checks.yml index 08fc3fe4c50a4..070d7b3a71bc7 100644 --- a/.github/workflows/package-checks.yml +++ b/.github/workflows/package-checks.yml @@ -36,7 +36,7 @@ jobs: - name: Setup Python id: setup_python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: '3.8' diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index 512aa13c6899a..de7e793c46d19 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -54,7 +54,7 @@ dependencies: - sqlalchemy=1.4.16 - tabulate=0.8.9 - tzdata=2022a - - xarray=0.19.0 + - xarray=0.21.0 - xlrd=2.0.1 - xlsxwriter=1.4.3 - zstandard=0.15.2 diff --git a/doc/source/development/contributing_codebase.rst b/doc/source/development/contributing_codebase.rst index 91f3d51460f99..b05f026bbbb44 100644 --- a/doc/source/development/contributing_codebase.rst +++ b/doc/source/development/contributing_codebase.rst @@ -338,7 +338,22 @@ Writing tests All tests should go into the ``tests`` subdirectory of the specific package. This folder contains many current examples of tests, and we suggest looking to these for -inspiration. Ideally, there should be one, and only one, obvious place for a test to reside. +inspiration. + +As a general tip, you can use the search functionality in your integrated development +environment (IDE) or the git grep command in a terminal to find test files in which the method +is called. If you are unsure of the best location to put your test, take your best guess, +but note that reviewers may request that you move the test to a different location. + +To use git grep, you can run the following command in a terminal: + +``git grep "function_name("`` + +This will search through all files in your repository for the text ``function_name(``. +This can be a useful way to quickly locate the function in the +codebase and determine the best location to add a test for it. + +Ideally, there should be one, and only one, obvious place for a test to reside. Until we reach that ideal, these are some rules of thumb for where a test should be located. diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 8e8f61c1d503f..68065c77f7881 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -333,7 +333,7 @@ Installable with ``pip install "pandas[computation]"``. Dependency Minimum Version pip extra Notes ========================= ================== =============== ============================================================= SciPy 1.7.1 computation Miscellaneous statistical functions -xarray 0.19.0 computation pandas-like API for N-dimensional data +xarray 0.21.0 computation pandas-like API for N-dimensional data ========================= ================== =============== ============================================================= Excel files diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 1e2b6d6fe4c20..ad53bd39e5ed3 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -774,12 +774,11 @@ as the one being grouped. The transform function must: * (Optionally) operates on the entire group chunk. If this is supported, a fast path is used starting from the *second* chunk. -.. deprecated:: 1.5.0 +.. versionchanged:: 2.0.0 When using ``.transform`` on a grouped DataFrame and the transformation function - returns a DataFrame, currently pandas does not align the result's index - with the input's index. This behavior is deprecated and alignment will - be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the result of the transformation function to avoid alignment. Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b1400be59b3a1..57558491e0e57 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -83,6 +83,9 @@ Other enhancements - :func:`timedelta_range` now supports a ``unit`` keyword ("s", "ms", "us", or "ns") to specify the desired resolution of the output index (:issue:`49824`) - :meth:`DataFrame.to_json` now supports a ``mode`` keyword with supported inputs 'w' and 'a'. Defaulting to 'w', 'a' can be used when lines=True and orient='records' to append record oriented json lines to an existing json file. (:issue:`35849`) - Added ``name`` parameter to :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_arrays` and :meth:`IntervalIndex.from_tuples` (:issue:`48911`) +- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`) +- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`) +- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`) - .. --------------------------------------------------------------------------- @@ -309,6 +312,35 @@ The new behavior, as for datetime64, either gives exactly the requested dtype or ser.astype("timedelta64[s]") ser.astype("timedelta64[D]") +.. _whatsnew_200.api_breaking.zero_len_indexes: + +Empty DataFrames/Series will now default to have a ``RangeIndex`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Before, constructing an empty (where ``data`` is ``None`` or an empty list-like argument) :class:`Series` or :class:`DataFrame` without +specifying the axes (``index=None``, ``columns=None``) would return the axes as empty :class:`Index` with object dtype. + +Now, the axes return an empty :class:`RangeIndex`. + +*Previous behavior*: + +.. code-block:: ipython + + In [8]: pd.Series().index + Out[8]: + Index([], dtype='object') + + In [9] pd.DataFrame().axes + Out[9]: + [Index([], dtype='object'), Index([], dtype='object')] + +*New behavior*: + +.. ipython:: python + + pd.Series().index + pd.DataFrame().axes + .. _whatsnew_200.api_breaking.deps: Increased minimum versions for dependencies @@ -337,6 +369,8 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | fastparquet | 0.6.3 | X | +-----------------+-----------------+---------+ +| xarray | 0.21.0 | X | ++-----------------+-----------------+---------+ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. @@ -365,9 +399,11 @@ Other API changes - Passing a sequence containing a type that cannot be converted to :class:`Timedelta` to :func:`to_timedelta` or to the :class:`Series` or :class:`DataFrame` constructor with ``dtype="timedelta64[ns]"`` or to :class:`TimedeltaIndex` now raises ``TypeError`` instead of ``ValueError`` (:issue:`49525`) - Changed behavior of :class:`Index` constructor with sequence containing at least one ``NaT`` and everything else either ``None`` or ``NaN`` to infer ``datetime64[ns]`` dtype instead of ``object``, matching :class:`Series` behavior (:issue:`49340`) - :func:`read_stata` with parameter ``index_col`` set to ``None`` (the default) will now set the index on the returned :class:`DataFrame` to a :class:`RangeIndex` instead of a :class:`Int64Index` (:issue:`49745`) +- Changed behavior of :class:`Index`, :class:`Series`, and :class:`DataFrame` arithmetic methods when working with object-dtypes, the results no longer do type inference on the result of the array operations, use ``result.infer_objects()`` to do type inference on the result (:issue:`49999`) - Changed behavior of :class:`Index` constructor with an object-dtype ``numpy.ndarray`` containing all-``bool`` values or all-complex values, this will now retain object dtype, consistent with the :class:`Series` behavior (:issue:`49594`) - Changed behavior of :meth:`DataFrame.shift` with ``axis=1``, an integer ``fill_value``, and homogeneous datetime-like dtype, this now fills new columns with integer dtypes instead of casting to datetimelike (:issue:`49842`) - Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`) +- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`) - :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`) - @@ -519,6 +555,7 @@ Removal of prior version deprecations/changes - Removed the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument (:issue:`40245`) - Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`) - Removed the ``truediv`` keyword from :func:`eval` (:issue:`29812`) +- Removed the ``method`` and ``tolerance`` arguments in :meth:`Index.get_loc`. Use ``index.get_indexer([label], method=..., tolerance=...)`` instead (:issue:`42269`) - Removed the ``pandas.datetime`` submodule (:issue:`30489`) - Removed the ``pandas.np`` submodule (:issue:`30296`) - Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`) @@ -561,6 +598,7 @@ Removal of prior version deprecations/changes - Changed behavior of :meth:`SparseArray.astype` when given a dtype that is not explicitly ``SparseDtype``, cast to the exact requested dtype rather than silently using a ``SparseDtype`` instead (:issue:`34457`) - Changed behavior of :meth:`Index.ravel` to return a view on the original :class:`Index` instead of a ``np.ndarray`` (:issue:`36900`) - Changed behavior of :meth:`Series.to_frame` and :meth:`Index.to_frame` with explicit ``name=None`` to use ``None`` for the column name instead of the index's name or default ``0`` (:issue:`45523`) +- Changed behavior of :func:`concat` with one array of ``bool``-dtype and another of integer dtype, this now returns ``object`` dtype instead of integer dtype; explicitly cast the bool object to integer before concatenating to get the old behavior (:issue:`45101`) - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) - Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`) @@ -596,7 +634,9 @@ Removal of prior version deprecations/changes - Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`) - Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`) - Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`) +- Using the method :meth:`DataFrameGroupBy.transform` with a callable that returns DataFrames will align to the input's index (:issue:`47244`) - When providing a list of columns of length one to :meth:`DataFrame.groupby`, the keys that are returned by iterating over the resulting :class:`DataFrameGroupBy` object will now be tuples of length one (:issue:`47761`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: @@ -698,7 +738,8 @@ Strings Interval ^^^^^^^^ -- +- Bug in :meth:`IntervalIndex.is_overlapping` incorrect output if interval has duplicate left boundaries (:issue:`49581`) +- Bug in :meth:`Series.infer_objects` failing to infer :class:`IntervalDtype` for an object series of :class:`Interval` objects (:issue:`50090`) - Indexing @@ -785,7 +826,8 @@ Reshaping Sparse ^^^^^^ -- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`) +- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`) +- Bug in :meth:`Series.astype` when converting a from ``datetime64[ns]`` to ``Sparse[datetime64[ns]]`` incorrectly raising (:issue:`50082`) - ExtensionArray diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index fcd30ab1faec8..7fcba58772ac4 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -70,7 +70,7 @@ tiebreakers = { } -cdef inline bint are_diff(object left, object right): +cdef bint are_diff(object left, object right): try: return fabs(left - right) > FP_ERR except TypeError: @@ -257,7 +257,7 @@ def groupsort_indexer(const intp_t[:] index, Py_ssize_t ngroups): return indexer.base, counts.base -cdef inline Py_ssize_t swap(numeric_t *a, numeric_t *b) nogil: +cdef Py_ssize_t swap(numeric_t *a, numeric_t *b) nogil: cdef: numeric_t t @@ -268,7 +268,7 @@ cdef inline Py_ssize_t swap(numeric_t *a, numeric_t *b) nogil: return 0 -cdef inline numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil: +cdef numeric_t kth_smallest_c(numeric_t* arr, Py_ssize_t k, Py_ssize_t n) nogil: """ See kth_smallest.__doc__. The additional parameter n specifies the maximum number of elements considered in arr, needed for compatibility with usage diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index a5b9bf02dcbe2..f0beab7193183 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -60,7 +60,7 @@ cdef enum InterpolationEnumType: INTERPOLATION_MIDPOINT -cdef inline float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) nogil: +cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) nogil: cdef: int i, j, na_count = 0 float64_t* tmp @@ -97,7 +97,7 @@ cdef inline float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) nog return result -cdef inline float64_t median_linear(float64_t* a, int n) nogil: +cdef float64_t median_linear(float64_t* a, int n) nogil: cdef: int i, j, na_count = 0 float64_t* tmp @@ -134,7 +134,7 @@ cdef inline float64_t median_linear(float64_t* a, int n) nogil: return result -cdef inline float64_t calc_median_linear(float64_t* a, int n, int na_count) nogil: +cdef float64_t calc_median_linear(float64_t* a, int n, int na_count) nogil: cdef: float64_t result @@ -1231,7 +1231,7 @@ def group_quantile( # group_nth, group_last, group_rank # ---------------------------------------------------------------------- -cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: +cdef bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: if numeric_object_t is object: # Should never be used, but we need to avoid the `val != val` below # or else cython will raise about gil acquisition. diff --git a/pandas/_libs/hashing.pyx b/pandas/_libs/hashing.pyx index 64f753f13a624..197ec99247b4a 100644 --- a/pandas/_libs/hashing.pyx +++ b/pandas/_libs/hashing.pyx @@ -110,11 +110,11 @@ def hash_object_array( return result.base # .base to retrieve underlying np.ndarray -cdef inline uint64_t _rotl(uint64_t x, uint64_t b) nogil: +cdef uint64_t _rotl(uint64_t x, uint64_t b) nogil: return (x << b) | (x >> (64 - b)) -cdef inline uint64_t u8to64_le(uint8_t* p) nogil: +cdef uint64_t u8to64_le(uint8_t* p) nogil: return (p[0] | p[1] << 8 | p[2] << 16 | @@ -125,8 +125,8 @@ cdef inline uint64_t u8to64_le(uint8_t* p) nogil: p[7] << 56) -cdef inline void _sipround(uint64_t* v0, uint64_t* v1, - uint64_t* v2, uint64_t* v3) nogil: +cdef void _sipround(uint64_t* v0, uint64_t* v1, + uint64_t* v2, uint64_t* v3) nogil: v0[0] += v1[0] v1[0] = _rotl(v1[0], 13) v1[0] ^= v0[0] diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd index b32bd4880588d..6f66884ac8206 100644 --- a/pandas/_libs/hashtable.pxd +++ b/pandas/_libs/hashtable.pxd @@ -185,5 +185,5 @@ cdef class Int64Vector(Vector): cdef resize(self) cpdef ndarray to_array(self) - cdef inline void append(self, int64_t x) + cdef void append(self, int64_t x) cdef extend(self, int64_t[:] x) diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 47dd0cbbd7164..06ad614b4f963 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -140,7 +140,7 @@ ctypedef struct {{name}}VectorData: @cython.wraparound(False) @cython.boundscheck(False) -cdef inline void append_data_{{dtype}}({{name}}VectorData *data, +cdef void append_data_{{dtype}}({{name}}VectorData *data, {{c_type}} x) nogil: data.data[data.n] = x @@ -163,7 +163,7 @@ ctypedef fused vector_data: Complex64VectorData StringVectorData -cdef inline bint needs_resize(vector_data *data) nogil: +cdef bint needs_resize(vector_data *data) nogil: return data.n == data.m # ---------------------------------------------------------------------- @@ -241,7 +241,7 @@ cdef class {{name}}Vector(Vector): self.external_view_exists = True return self.ao - cdef inline void append(self, {{c_type}} x): + cdef void append(self, {{c_type}} x): if needs_resize(self.data): if self.external_view_exists: @@ -311,7 +311,7 @@ cdef class StringVector(Vector): self.data.m = self.data.n return ao - cdef inline void append(self, char *x): + cdef void append(self, char *x): if needs_resize(self.data): self.resize() @@ -339,7 +339,7 @@ cdef class ObjectVector(Vector): def __len__(self) -> int: return self.n - cdef inline append(self, object obj): + cdef append(self, object obj): if self.n == self.m: if self.external_view_exists: raise ValueError("external reference but " diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index eb4e957f644ac..9e2adee407b1a 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -40,7 +40,7 @@ from pandas._libs.missing cimport ( multiindex_nulls_shift = 2 -cdef inline bint is_definitely_invalid_key(object val): +cdef bint is_definitely_invalid_key(object val): try: hash(val) except TypeError: @@ -176,7 +176,7 @@ cdef class IndexEngine: loc = self.values.searchsorted(self._np_type(val), side="left") return loc - cdef inline _get_loc_duplicates(self, object val): + cdef _get_loc_duplicates(self, object val): # -> Py_ssize_t | slice | ndarray[bool] cdef: Py_ssize_t diff, left, right @@ -225,7 +225,7 @@ cdef class IndexEngine: return self.unique == 1 - cdef inline _do_unique_check(self): + cdef _do_unique_check(self): # this de-facto the same self._ensure_mapping_populated() @@ -244,7 +244,7 @@ cdef class IndexEngine: return self.monotonic_dec == 1 - cdef inline _do_monotonic_check(self): + cdef _do_monotonic_check(self): cdef: bint is_unique try: @@ -277,7 +277,7 @@ cdef class IndexEngine: def is_mapping_populated(self) -> bool: return self.mapping is not None - cdef inline _ensure_mapping_populated(self): + cdef _ensure_mapping_populated(self): # this populates the mapping # if its not already populated # also satisfies the need_unique_check @@ -932,7 +932,7 @@ cdef class SharedEngine: return self._get_loc_duplicates(val) - cdef inline _get_loc_duplicates(self, object val): + cdef _get_loc_duplicates(self, object val): # -> Py_ssize_t | slice | ndarray[bool] cdef: Py_ssize_t diff diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in index e7a310513d2fa..0d7c96a6f2f2b 100644 --- a/pandas/_libs/intervaltree.pxi.in +++ b/pandas/_libs/intervaltree.pxi.in @@ -81,7 +81,8 @@ cdef class IntervalTree(IntervalMixin): """How to sort the left labels; this is used for binary search """ if self._left_sorter is None: - self._left_sorter = np.argsort(self.left) + values = [self.right, self.left] + self._left_sorter = np.lexsort(values) return self._left_sorter @property diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 81e0f3de748ff..d23af51a0867f 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -236,7 +236,7 @@ def is_scalar(val: object) -> bool: or is_offset_object(val)) -cdef inline int64_t get_itemsize(object val): +cdef int64_t get_itemsize(object val): """ Get the itemsize of a NumPy scalar, -1 if not a NumPy scalar. @@ -1107,7 +1107,7 @@ def is_list_like(obj: object, allow_sets: bool = True) -> bool: return c_is_list_like(obj, allow_sets) -cdef inline bint c_is_list_like(object obj, bint allow_sets) except -1: +cdef bint c_is_list_like(object obj, bint allow_sets) except -1: # first, performance short-cuts for the most common cases if util.is_array(obj): # exclude zero-dimensional numpy arrays, effectively scalars @@ -1230,7 +1230,7 @@ cdef class Seen: self.interval_ = False self.coerce_numeric = coerce_numeric - cdef inline bint check_uint64_conflict(self) except -1: + cdef bint check_uint64_conflict(self) except -1: """ Check whether we can safely convert a uint64 array to a numeric dtype. @@ -1264,7 +1264,7 @@ cdef class Seen: return (self.uint_ and (self.null_ or self.sint_) and not self.coerce_numeric) - cdef inline saw_null(self): + cdef saw_null(self): """ Set flags indicating that a null value was encountered. """ @@ -1591,7 +1591,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str: return "mixed" -cdef inline bint is_timedelta(object o): +cdef bint is_timedelta(object o): return PyDelta_Check(o) or util.is_timedelta64_object(o) @@ -1679,10 +1679,10 @@ cdef class Validator: @cython.internal cdef class BoolValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return util.is_bool_object(value) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.bool_) @@ -1696,10 +1696,10 @@ cpdef bint is_bool_array(ndarray values, bint skipna=False): @cython.internal cdef class IntegerValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return util.is_integer_object(value) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.integer) @@ -1714,7 +1714,7 @@ cpdef bint is_integer_array(ndarray values, bint skipna=True): @cython.internal cdef class IntegerNaValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return (util.is_integer_object(value) or (util.is_nan(value) and util.is_float_object(value))) @@ -1728,10 +1728,10 @@ cdef bint is_integer_na_array(ndarray values, bint skipna=True): @cython.internal cdef class IntegerFloatValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return util.is_integer_object(value) or util.is_float_object(value) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.integer) @@ -1745,10 +1745,10 @@ cdef bint is_integer_float_array(ndarray values, bint skipna=True): @cython.internal cdef class FloatValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return util.is_float_object(value) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.floating) @@ -1761,13 +1761,13 @@ cpdef bint is_float_array(ndarray values): @cython.internal cdef class ComplexValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return ( util.is_complex_object(value) or (util.is_float_object(value) and is_nan(value)) ) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.complexfloating) @@ -1779,7 +1779,7 @@ cdef bint is_complex_array(ndarray values): @cython.internal cdef class DecimalValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return is_decimal(value) @@ -1793,10 +1793,10 @@ cdef bint is_decimal_array(ndarray values, bint skipna=False): @cython.internal cdef class StringValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return isinstance(value, str) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.str_) @@ -1810,10 +1810,10 @@ cpdef bint is_string_array(ndarray values, bint skipna=False): @cython.internal cdef class BytesValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return isinstance(value, bytes) - cdef inline bint is_array_typed(self) except -1: + cdef bint is_array_typed(self) except -1: return issubclass(self.dtype.type, np.bytes_) @@ -1836,14 +1836,14 @@ cdef class TemporalValidator(Validator): self.skipna = skipna self.all_generic_na = True - cdef inline bint is_valid(self, object value) except -1: + cdef bint is_valid(self, object value) except -1: return self.is_value_typed(value) or self.is_valid_null(value) cdef bint is_valid_null(self, object value) except -1: raise NotImplementedError(f"{type(self).__name__} child class " "must define is_valid_null") - cdef inline bint is_valid_skipna(self, object value) except -1: + cdef bint is_valid_skipna(self, object value) except -1: cdef: bint is_typed_null = self.is_valid_null(value) bint is_generic_null = value is None or util.is_nan(value) @@ -1864,7 +1864,7 @@ cdef class DatetimeValidator(TemporalValidator): cdef bint is_value_typed(self, object value) except -1: return PyDateTime_Check(value) - cdef inline bint is_valid_null(self, object value) except -1: + cdef bint is_valid_null(self, object value) except -1: return is_null_datetime64(value) @@ -1877,7 +1877,7 @@ cpdef bint is_datetime_array(ndarray values, bint skipna=True): @cython.internal cdef class Datetime64Validator(DatetimeValidator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return util.is_datetime64_object(value) @@ -1891,7 +1891,7 @@ cpdef bint is_datetime64_array(ndarray values, bint skipna=True): @cython.internal cdef class AnyDatetimeValidator(DatetimeValidator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return util.is_datetime64_object(value) or ( PyDateTime_Check(value) and value.tzinfo is None ) @@ -1943,13 +1943,13 @@ cdef class TimedeltaValidator(TemporalValidator): cdef bint is_value_typed(self, object value) except -1: return PyDelta_Check(value) - cdef inline bint is_valid_null(self, object value) except -1: + cdef bint is_valid_null(self, object value) except -1: return is_null_timedelta64(value) @cython.internal cdef class AnyTimedeltaValidator(TimedeltaValidator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return is_timedelta(value) @@ -1966,7 +1966,7 @@ cpdef bint is_timedelta_or_timedelta64_array(ndarray values, bint skipna=True): @cython.internal cdef class DateValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return PyDate_Check(value) @@ -1979,7 +1979,7 @@ cpdef bint is_date_array(ndarray values, bint skipna=False): @cython.internal cdef class TimeValidator(Validator): - cdef inline bint is_value_typed(self, object value) except -1: + cdef bint is_value_typed(self, object value) except -1: return PyTime_Check(value) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b32061fbca0e2..a3b0451381ad2 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -167,7 +167,7 @@ cpdef bint checknull(object val, bint inf_as_na=False): return is_decimal_na(val) -cdef inline bint is_decimal_na(object val): +cdef bint is_decimal_na(object val): """ Is this a decimal.Decimal object Decimal("NAN"). """ @@ -258,7 +258,7 @@ def isneginf_scalar(val: object) -> bool: return util.is_float_object(val) and val == NEGINF -cdef inline bint is_null_datetime64(v): +cdef bint is_null_datetime64(v): # determine if we have a null for a datetime (or integer versions), # excluding np.timedelta64('nat') if checknull_with_nat(v) or is_dt64nat(v): @@ -266,7 +266,7 @@ cdef inline bint is_null_datetime64(v): return False -cdef inline bint is_null_timedelta64(v): +cdef bint is_null_timedelta64(v): # determine if we have a null for a timedelta (or integer versions), # excluding np.datetime64('nat') if checknull_with_nat(v) or is_td64nat(v): diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 73005c7b5cfa0..1941cfde4acb9 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1069,10 +1069,10 @@ cdef class TextReader: return results # -> tuple["ArrayLike", int]: - cdef inline _convert_tokens(self, Py_ssize_t i, int64_t start, - int64_t end, object name, bint na_filter, - kh_str_starts_t *na_hashset, - object na_flist, object col_dtype): + cdef _convert_tokens(self, Py_ssize_t i, int64_t start, + int64_t end, object name, bint na_filter, + kh_str_starts_t *na_hashset, + object na_flist, object col_dtype): if col_dtype is not None: col_res, na_count = self._convert_with_dtype( @@ -1574,9 +1574,9 @@ cdef _to_fw_string(parser_t *parser, int64_t col, int64_t line_start, return result -cdef inline void _to_fw_string_nogil(parser_t *parser, int64_t col, - int64_t line_start, int64_t line_end, - size_t width, char *data) nogil: +cdef void _to_fw_string_nogil(parser_t *parser, int64_t col, + int64_t line_start, int64_t line_end, + size_t width, char *data) nogil: cdef: int64_t i coliter_t it @@ -1629,16 +1629,16 @@ cdef _try_double(parser_t *parser, int64_t col, return result, na_count -cdef inline int _try_double_nogil(parser_t *parser, - float64_t (*double_converter)( - const char *, char **, char, - char, char, int, int *, int *) nogil, - int64_t col, int64_t line_start, int64_t line_end, - bint na_filter, kh_str_starts_t *na_hashset, - bint use_na_flist, - const kh_float64_t *na_flist, - float64_t NA, float64_t *data, - int *na_count) nogil: +cdef int _try_double_nogil(parser_t *parser, + float64_t (*double_converter)( + const char *, char **, char, + char, char, int, int *, int *) nogil, + int64_t col, int64_t line_start, int64_t line_end, + bint na_filter, kh_str_starts_t *na_hashset, + bint use_na_flist, + const kh_float64_t *na_flist, + float64_t NA, float64_t *data, + int *na_count) nogil: cdef: int error = 0, Py_ssize_t i, lines = line_end - line_start @@ -1738,11 +1738,11 @@ cdef _try_uint64(parser_t *parser, int64_t col, return result -cdef inline int _try_uint64_nogil(parser_t *parser, int64_t col, - int64_t line_start, - int64_t line_end, bint na_filter, - const kh_str_starts_t *na_hashset, - uint64_t *data, uint_state *state) nogil: +cdef int _try_uint64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + uint64_t *data, uint_state *state) nogil: cdef: int error Py_ssize_t i, lines = line_end - line_start @@ -1802,11 +1802,11 @@ cdef _try_int64(parser_t *parser, int64_t col, return result, na_count -cdef inline int _try_int64_nogil(parser_t *parser, int64_t col, - int64_t line_start, - int64_t line_end, bint na_filter, - const kh_str_starts_t *na_hashset, int64_t NA, - int64_t *data, int *na_count) nogil: +cdef int _try_int64_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, int64_t NA, + int64_t *data, int *na_count) nogil: cdef: int error Py_ssize_t i, lines = line_end - line_start @@ -1865,14 +1865,14 @@ cdef _try_bool_flex(parser_t *parser, int64_t col, return result.view(np.bool_), na_count -cdef inline int _try_bool_flex_nogil(parser_t *parser, int64_t col, - int64_t line_start, - int64_t line_end, bint na_filter, - const kh_str_starts_t *na_hashset, - const kh_str_starts_t *true_hashset, - const kh_str_starts_t *false_hashset, - uint8_t NA, uint8_t *data, - int *na_count) nogil: +cdef int _try_bool_flex_nogil(parser_t *parser, int64_t col, + int64_t line_start, + int64_t line_end, bint na_filter, + const kh_str_starts_t *na_hashset, + const kh_str_starts_t *true_hashset, + const kh_str_starts_t *false_hashset, + uint8_t NA, uint8_t *data, + int *na_count) nogil: cdef: int error = 0 Py_ssize_t i, lines = line_end - line_start diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index e6a2c7b1b050a..0e310e91fab74 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -13,7 +13,7 @@ ctypedef fused sparse_t: int64_t -cdef inline float64_t __div__(sparse_t a, sparse_t b): +cdef float64_t __div__(sparse_t a, sparse_t b): if b == 0: if a > 0: return INF @@ -25,11 +25,11 @@ cdef inline float64_t __div__(sparse_t a, sparse_t b): return float(a) / b -cdef inline float64_t __truediv__(sparse_t a, sparse_t b): +cdef float64_t __truediv__(sparse_t a, sparse_t b): return __div__(a, b) -cdef inline sparse_t __mod__(sparse_t a, sparse_t b): +cdef sparse_t __mod__(sparse_t a, sparse_t b): if b == 0: if sparse_t is float64_t: return NaN @@ -39,7 +39,7 @@ cdef inline sparse_t __mod__(sparse_t a, sparse_t b): return a % b -cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): +cdef sparse_t __floordiv__(sparse_t a, sparse_t b): if b == 0: if sparse_t is float64_t: # Match non-sparse Series behavior implemented in mask_zero_div_zero @@ -131,7 +131,7 @@ def get_dispatch(dtypes): @cython.wraparound(False) @cython.boundscheck(False) -cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, +cdef tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, BlockIndex xindex, {{dtype}}_t xfill, {{dtype}}_t[:] y_, @@ -232,7 +232,7 @@ cdef inline tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, @cython.wraparound(False) @cython.boundscheck(False) -cdef inline tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, +cdef tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, IntIndex xindex, {{dtype}}_t xfill, {{dtype}}_t[:] y_, diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 35a4131d11d50..6f58fecd1ac81 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -434,7 +434,7 @@ def first_non_null(values: ndarray) -> int: if ( isinstance(val, str) and - (len(val) == 0 or val in ("now", "today", *nat_strings)) + (len(val) == 0 or val in nat_strings or val in ("now", "today")) ): continue return i @@ -842,7 +842,7 @@ cdef _array_to_datetime_object( return oresult, None -cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): +cdef bint _parse_today_now(str val, int64_t* iresult, bint utc): # We delay this check for as long as possible # because it catches relatively rare cases diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 1b6dace6e90b1..3b2f9e5f827d9 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -83,7 +83,7 @@ TD64NS_DTYPE = np.dtype("m8[ns]") # ---------------------------------------------------------------------- # Unit Conversion Helpers -cdef inline int64_t cast_from_unit(object ts, str unit) except? -1: +cdef int64_t cast_from_unit(object ts, str unit) except? -1: """ Return a casting of the unit represented to nanoseconds round the fractional part of a float to our precision, p. @@ -170,7 +170,7 @@ cpdef inline (int64_t, int) precision_from_unit(str unit): return m, p -cdef inline int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: +cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1: """ Extract the value and unit from a np.datetime64 object, then convert the value to nanoseconds if necessary. @@ -545,7 +545,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit, return convert_datetime_to_tsobject(dt, tz) -cdef inline check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): +cdef check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Check that we haven't silently overflowed in timezone conversion @@ -588,7 +588,7 @@ cdef inline check_overflows(_TSObject obj, NPY_DATETIMEUNIT reso=NPY_FR_ns): # ---------------------------------------------------------------------- # Localization -cdef inline void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): +cdef void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): """ Given the UTC nanosecond timestamp in obj.value, find the wall-clock representation of that timestamp in the given timezone. @@ -630,7 +630,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso): obj.tzinfo = tz -cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz): +cdef datetime _localize_pydatetime(datetime dt, tzinfo tz): """ Take a datetime/Timestamp in UTC and localizes to timezone tz. diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 7e5d1d13cbda3..242e7159d29b5 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -189,7 +189,7 @@ def get_date_name_field( return out -cdef inline bint _is_on_month(int month, int compare_month, int modby) nogil: +cdef bint _is_on_month(int month, int compare_month, int modby) nogil: """ Analogous to DateOffset.is_on_offset checking for the month part of a date. """ @@ -682,7 +682,7 @@ class RoundTo: return 4 -cdef inline ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit): +cdef ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") @@ -700,7 +700,7 @@ cdef inline ndarray[int64_t] _floor_int64(const int64_t[:] values, int64_t unit) return result -cdef inline ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): +cdef ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): cdef: Py_ssize_t i, n = len(values) ndarray[int64_t] result = np.empty(n, dtype="i8") @@ -724,11 +724,11 @@ cdef inline ndarray[int64_t] _ceil_int64(const int64_t[:] values, int64_t unit): return result -cdef inline ndarray[int64_t] _rounddown_int64(values, int64_t unit): +cdef ndarray[int64_t] _rounddown_int64(values, int64_t unit): return _ceil_int64(values - unit // 2, unit) -cdef inline ndarray[int64_t] _roundup_int64(values, int64_t unit): +cdef ndarray[int64_t] _roundup_int64(values, int64_t unit): return _floor_int64(values + unit // 2, unit) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index aa75c886a4491..9407f57a282bf 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -1216,14 +1216,14 @@ NaT = c_NaT # Python-visible # ---------------------------------------------------------------------- -cdef inline bint checknull_with_nat(object val): +cdef bint checknull_with_nat(object val): """ Utility to check if a value is a nat or not. """ return val is None or util.is_nan(val) or val is c_NaT -cdef inline bint is_dt64nat(object val): +cdef bint is_dt64nat(object val): """ Is this a np.datetime64 object np.datetime64("NaT"). """ @@ -1232,7 +1232,7 @@ cdef inline bint is_dt64nat(object val): return False -cdef inline bint is_td64nat(object val): +cdef bint is_td64nat(object val): """ Is this a np.timedelta64 object np.timedelta64("NaT"). """ diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index e5f683c56da9b..c4f7812cea107 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -59,7 +59,7 @@ cdef extern from "src/datetime/np_datetime_strings.h": # ---------------------------------------------------------------------- # numpy object inspection -cdef inline npy_datetime get_datetime64_value(object obj) nogil: +cdef npy_datetime get_datetime64_value(object obj) nogil: """ returns the int64 value underlying scalar numpy datetime64 object @@ -69,14 +69,14 @@ cdef inline npy_datetime get_datetime64_value(object obj) nogil: return (obj).obval -cdef inline npy_timedelta get_timedelta64_value(object obj) nogil: +cdef npy_timedelta get_timedelta64_value(object obj) nogil: """ returns the int64 value underlying scalar numpy timedelta64 object """ return (obj).obval -cdef inline NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: +cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil: """ returns the unit part of the dtype for a numpy datetime64 object. """ @@ -136,7 +136,7 @@ cdef bint cmp_dtstructs( return cmp_res == -1 or cmp_res == 0 -cdef inline bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: +cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: """ cmp_scalar is a more performant version of PyObject_RichCompare typed for int64_t arguments. @@ -229,7 +229,7 @@ def py_td64_to_tdstruct(int64_t td64, NPY_DATETIMEUNIT unit): return tds # <- returned as a dict to python -cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts): +cdef void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts): if PyDateTime_CheckExact(dt): dts.year = PyDateTime_GET_YEAR(dt) else: @@ -246,9 +246,9 @@ cdef inline void pydatetime_to_dtstruct(datetime dt, npy_datetimestruct *dts): dts.ps = dts.as = 0 -cdef inline int64_t pydatetime_to_dt64(datetime val, - npy_datetimestruct *dts, - NPY_DATETIMEUNIT reso=NPY_FR_ns): +cdef int64_t pydatetime_to_dt64(datetime val, + npy_datetimestruct *dts, + NPY_DATETIMEUNIT reso=NPY_FR_ns): """ Note we are assuming that the datetime object is timezone-naive. """ @@ -256,7 +256,7 @@ cdef inline int64_t pydatetime_to_dt64(datetime val, return npy_datetimestruct_to_datetime(reso, dts) -cdef inline void pydate_to_dtstruct(date val, npy_datetimestruct *dts): +cdef void pydate_to_dtstruct(date val, npy_datetimestruct *dts): dts.year = PyDateTime_GET_YEAR(val) dts.month = PyDateTime_GET_MONTH(val) dts.day = PyDateTime_GET_DAY(val) @@ -264,14 +264,14 @@ cdef inline void pydate_to_dtstruct(date val, npy_datetimestruct *dts): dts.ps = dts.as = 0 return -cdef inline int64_t pydate_to_dt64( +cdef int64_t pydate_to_dt64( date val, npy_datetimestruct *dts, NPY_DATETIMEUNIT reso=NPY_FR_ns ): pydate_to_dtstruct(val, dts) return npy_datetimestruct_to_datetime(reso, dts) -cdef inline int string_to_dts( +cdef int string_to_dts( str val, npy_datetimestruct* dts, NPY_DATETIMEUNIT* out_bestunit, diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index d0f73b44e835f..482cf91c92b70 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1615,16 +1615,24 @@ cdef class BusinessHour(BusinessMixin): Normalize start/end dates to midnight before generating date range. weekmask : str, Default 'Mon Tue Wed Thu Fri' Weekmask of valid business days, passed to ``numpy.busdaycalendar``. - start : str, default "09:00" + start : str, time, or list of str/time, default "09:00" Start time of your custom business hour in 24h format. - end : str, default: "17:00" + end : str, time, or list of str/time, default: "17:00" End time of your custom business hour in 24h format. Examples -------- + >>> from datetime import time >>> ts = pd.Timestamp(2022, 8, 5, 16) >>> ts + pd.offsets.BusinessHour() Timestamp('2022-08-08 09:00:00') + >>> ts + pd.offsets.BusinessHour(start="11:00") + Timestamp('2022-08-08 11:00:00') + >>> ts + pd.offsets.BusinessHour(end=time(19, 0)) + Timestamp('2022-08-05 17:00:00') + >>> ts + pd.offsets.BusinessHour(start=[time(9, 0), "20:00"], + ... end=["17:00", time(22, 0)]) + Timestamp('2022-08-05 20:00:00') """ _prefix = "BH" @@ -3619,16 +3627,24 @@ cdef class CustomBusinessHour(BusinessHour): Normalize start/end dates to midnight before generating date range. weekmask : str, Default 'Mon Tue Wed Thu Fri' Weekmask of valid business days, passed to ``numpy.busdaycalendar``. - start : str, default "09:00" + start : str, time, or list of str/time, default "09:00" Start time of your custom business hour in 24h format. - end : str, default: "17:00" + end : str, time, or list of str/time, default: "17:00" End time of your custom business hour in 24h format. Examples -------- + >>> from datetime import time >>> ts = pd.Timestamp(2022, 8, 5, 16) >>> ts + pd.offsets.CustomBusinessHour() Timestamp('2022-08-08 09:00:00') + >>> ts + pd.offsets.CustomBusinessHour(start="11:00") + Timestamp('2022-08-08 11:00:00') + >>> ts + pd.offsets.CustomBusinessHour(end=time(19, 0)) + Timestamp('2022-08-05 17:00:00') + >>> ts + pd.offsets.CustomBusinessHour(start=[time(9, 0), "20:00"], + ... end=["17:00", time(22, 0)]) + Timestamp('2022-08-05 20:00:00') """ _prefix = "CBH" @@ -4004,14 +4020,14 @@ cdef datetime _shift_day(datetime other, int days): return localize_pydatetime(shifted, tz) -cdef inline int year_add_months(npy_datetimestruct dts, int months) nogil: +cdef int year_add_months(npy_datetimestruct dts, int months) nogil: """ New year number after shifting npy_datetimestruct number of months. """ return dts.year + (dts.month + months - 1) // 12 -cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: +cdef int month_add_months(npy_datetimestruct dts, int months) nogil: """ New month number after shifting npy_datetimestruct number of months. @@ -4305,7 +4321,7 @@ def shift_month(stamp: datetime, months: int, day_opt: object = None) -> datetim return stamp.replace(year=year, month=month, day=day) -cdef inline int get_day_of_month(npy_datetimestruct* dts, str day_opt) nogil: +cdef int get_day_of_month(npy_datetimestruct* dts, str day_opt) nogil: """ Find the day in `other`'s month that satisfies a DateOffset's is_on_offset policy, as described by the `day_opt` argument. @@ -4416,10 +4432,10 @@ def roll_qtrday(other: datetime, n: int, month: int, return _roll_qtrday(&dts, n, months_since, day_opt) -cdef inline int _roll_qtrday(npy_datetimestruct* dts, - int n, - int months_since, - str day_opt) nogil except? -1: +cdef int _roll_qtrday(npy_datetimestruct* dts, + int n, + int months_since, + str day_opt) nogil except? -1: """ See roll_qtrday.__doc__ """ diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 25a2722c48bd6..44d06df53e0be 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -102,24 +102,24 @@ cdef: int MAX_DAYS_IN_MONTH = 31, MAX_MONTH = 12 -cdef inline bint _is_delimiter(const char ch): +cdef bint _is_delimiter(const char ch): return strchr(delimiters, ch) != NULL -cdef inline int _parse_1digit(const char* s): +cdef int _parse_1digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 1 return result -cdef inline int _parse_2digit(const char* s): +cdef int _parse_2digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 10 result += getdigit_ascii(s[1], -100) * 1 return result -cdef inline int _parse_4digit(const char* s): +cdef int _parse_4digit(const char* s): cdef int result = 0 result += getdigit_ascii(s[0], -10) * 1000 result += getdigit_ascii(s[1], -100) * 100 @@ -128,7 +128,7 @@ cdef inline int _parse_4digit(const char* s): return result -cdef inline object _parse_delimited_date(str date_string, bint dayfirst): +cdef object _parse_delimited_date(str date_string, bint dayfirst): """ Parse special cases of dates: MM/DD/YYYY, DD/MM/YYYY, MM/YYYY. @@ -234,7 +234,7 @@ cdef inline object _parse_delimited_date(str date_string, bint dayfirst): raise DateParseError(f"Invalid date specified ({month}/{day})") -cdef inline bint does_string_look_like_time(str parse_string): +cdef bint does_string_look_like_time(str parse_string): """ Checks whether given string is a time: it has to start either from H:MM or from HH:MM, and hour and minute values must be valid. @@ -500,8 +500,8 @@ cpdef bint _does_string_look_like_datetime(str py_string): return True -cdef inline object _parse_dateabbr_string(object date_string, datetime default, - str freq=None): +cdef object _parse_dateabbr_string(object date_string, datetime default, + str freq=None): cdef: object ret # year initialized to prevent compiler warnings @@ -1074,7 +1074,7 @@ cdef str _fill_token(token: str, padding: int): @cython.wraparound(False) @cython.boundscheck(False) -cdef inline object convert_to_unicode(object item, bint keep_trivial_numbers): +cdef object convert_to_unicode(object item, bint keep_trivial_numbers): """ Convert `item` to str. diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index cc9c2d631bcd9..86fa965be92c4 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -311,21 +311,21 @@ cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back, return DtoB_weekday(unix_date) -cdef inline int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: +cdef int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: if af_info.is_end: return (ordinal + 1) * af_info.intraday_conversion_factor - 1 else: return ordinal * af_info.intraday_conversion_factor -cdef inline int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: +cdef int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil: return ordinal // af_info.intraday_conversion_factor -cdef inline int64_t transform_via_day(int64_t ordinal, - asfreq_info *af_info, - freq_conv_func first_func, - freq_conv_func second_func) nogil: +cdef int64_t transform_via_day(int64_t ordinal, + asfreq_info *af_info, + freq_conv_func first_func, + freq_conv_func second_func) nogil: cdef: int64_t result @@ -677,12 +677,12 @@ cdef char* c_strftime(npy_datetimestruct *dts, char *fmt): # ---------------------------------------------------------------------- # Conversion between date_info and npy_datetimestruct -cdef inline int get_freq_group(int freq) nogil: +cdef int get_freq_group(int freq) nogil: # See also FreqGroup.get_freq_group return (freq // 1000) * 1000 -cdef inline int get_freq_group_index(int freq) nogil: +cdef int get_freq_group_index(int freq) nogil: return freq // 1000 @@ -721,12 +721,12 @@ cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil: return unix_date -cdef inline int64_t dts_to_month_ordinal(npy_datetimestruct* dts) nogil: +cdef int64_t dts_to_month_ordinal(npy_datetimestruct* dts) nogil: # AKA: use npy_datetimestruct_to_datetime(NPY_FR_M, &dts) return ((dts.year - 1970) * 12 + dts.month - 1) -cdef inline int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) nogil: +cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) nogil: cdef: int64_t result @@ -737,7 +737,7 @@ cdef inline int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) nog return result -cdef inline int64_t dts_to_qtr_ordinal(npy_datetimestruct* dts, int to_end) nogil: +cdef int64_t dts_to_qtr_ordinal(npy_datetimestruct* dts, int to_end) nogil: cdef: int quarter @@ -746,7 +746,7 @@ cdef inline int64_t dts_to_qtr_ordinal(npy_datetimestruct* dts, int to_end) nogi return ((dts.year - 1970) * 4 + quarter - 1) -cdef inline int get_anchor_month(int freq, int freq_group) nogil: +cdef int get_anchor_month(int freq, int freq_group) nogil: cdef: int fmonth fmonth = freq - freq_group @@ -930,7 +930,7 @@ cdef int get_yq(int64_t ordinal, int freq, npy_datetimestruct* dts): return quarter -cdef inline int month_to_quarter(int month) nogil: +cdef int month_to_quarter(int month) nogil: return (month - 1) // 3 + 1 @@ -1027,7 +1027,7 @@ cdef int calc_a_year_end(int freq, int group) nogil: return result -cdef inline int calc_week_end(int freq, int group) nogil: +cdef int calc_week_end(int freq, int group) nogil: return freq - group @@ -1465,7 +1465,7 @@ def extract_ordinals(ndarray values, freq) -> np.ndarray: return ordinals -cdef inline int64_t _extract_ordinal(object item, str freqstr, freq) except? -1: +cdef int64_t _extract_ordinal(object item, str freqstr, freq) except? -1: """ See extract_ordinals. """ diff --git a/pandas/_libs/tslibs/timedeltas.pxd b/pandas/_libs/tslibs/timedeltas.pxd index e0313271a13df..3f37ef7eb1e3f 100644 --- a/pandas/_libs/tslibs/timedeltas.pxd +++ b/pandas/_libs/tslibs/timedeltas.pxd @@ -24,6 +24,6 @@ cdef class _Timedelta(timedelta): cdef bint _has_ns(self) cdef bint _is_in_pytimedelta_bounds(self) cdef _ensure_components(_Timedelta self) - cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op) + cdef bint _compare_mismatched_resos(self, _Timedelta other, op) cdef _Timedelta _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=*) cpdef _maybe_cast_to_matching_resos(self, _Timedelta other) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 618e0208154fa..9284980ac2f65 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -455,7 +455,7 @@ def array_to_timedelta64( return result -cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: +cdef int64_t _item_to_timedelta64_fastpath(object item) except? -1: """ See array_to_timedelta64. """ @@ -467,7 +467,7 @@ cdef inline int64_t _item_to_timedelta64_fastpath(object item) except? -1: return parse_timedelta_string(item) -cdef inline int64_t _item_to_timedelta64( +cdef int64_t _item_to_timedelta64( object item, str parsed_unit, str errors @@ -488,7 +488,7 @@ cdef inline int64_t _item_to_timedelta64( raise -cdef inline int64_t parse_timedelta_string(str ts) except? -1: +cdef int64_t parse_timedelta_string(str ts) except? -1: """ Parse a regular format timedelta string. Return an int64_t (in ns) or raise a ValueError on an invalid parse. @@ -658,7 +658,7 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1: return result -cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): +cdef int64_t timedelta_as_neg(int64_t value, bint neg): """ Parameters @@ -671,7 +671,7 @@ cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): return value -cdef inline timedelta_from_spec(object number, object frac, object unit): +cdef timedelta_from_spec(object number, object frac, object unit): """ Parameters @@ -813,7 +813,7 @@ def _binary_op_method_timedeltalike(op, name): # ---------------------------------------------------------------------- # Timedelta Construction -cdef inline int64_t parse_iso_format_string(str ts) except? -1: +cdef int64_t parse_iso_format_string(str ts) except? -1: """ Extracts and cleanses the appropriate values from a match object with groups for each component of an ISO 8601 duration @@ -1151,7 +1151,7 @@ cdef class _Timedelta(timedelta): return self._compare_mismatched_resos(ots, op) # TODO: re-use/share with Timestamp - cdef inline bint _compare_mismatched_resos(self, _Timedelta other, op): + cdef bint _compare_mismatched_resos(self, _Timedelta other, op): # Can't just dispatch to numpy as they silently overflow and get it wrong cdef: npy_datetimestruct dts_self diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 108c884bba170..851639b714d85 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -126,7 +126,7 @@ _no_input = object() # ---------------------------------------------------------------------- -cdef inline _Timestamp create_timestamp_from_ts( +cdef _Timestamp create_timestamp_from_ts( int64_t value, npy_datetimestruct dts, tzinfo tz, @@ -361,7 +361,7 @@ cdef class _Timestamp(ABCTimestamp): return self._compare_mismatched_resos(ots, op) # TODO: copied from Timedelta; try to de-duplicate - cdef inline bint _compare_mismatched_resos(self, _Timestamp other, int op): + cdef bint _compare_mismatched_resos(self, _Timestamp other, int op): # Can't just dispatch to numpy as they silently overflow and get it wrong cdef: npy_datetimestruct dts_self @@ -2224,7 +2224,7 @@ Timestamp.daysinmonth = Timestamp.days_in_month @cython.cdivision(False) -cdef inline int64_t normalize_i8_stamp(int64_t local_val, int64_t ppd) nogil: +cdef int64_t normalize_i8_stamp(int64_t local_val, int64_t ppd) nogil: """ Round the localized nanosecond timestamp down to the previous midnight. diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 8d7bebe5d46c2..ae34eb242fa2f 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -57,7 +57,7 @@ cdef tzinfo utc_zoneinfo = None # ---------------------------------------------------------------------- -cdef inline bint is_utc_zoneinfo(tzinfo tz): +cdef bint is_utc_zoneinfo(tzinfo tz): # Workaround for cases with missing tzdata # https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025 if tz is None or zoneinfo is None: @@ -86,22 +86,22 @@ cpdef inline bint is_utc(tzinfo tz): ) -cdef inline bint is_zoneinfo(tzinfo tz): +cdef bint is_zoneinfo(tzinfo tz): if ZoneInfo is None: return False return isinstance(tz, ZoneInfo) -cdef inline bint is_tzlocal(tzinfo tz): +cdef bint is_tzlocal(tzinfo tz): return isinstance(tz, _dateutil_tzlocal) -cdef inline bint treat_tz_as_pytz(tzinfo tz): +cdef bint treat_tz_as_pytz(tzinfo tz): return (hasattr(tz, "_utc_transition_times") and hasattr(tz, "_transition_info")) -cdef inline bint treat_tz_as_dateutil(tzinfo tz): +cdef bint treat_tz_as_dateutil(tzinfo tz): return hasattr(tz, "_trans_list") and hasattr(tz, "_trans_idx") @@ -192,7 +192,7 @@ def _p_tz_cache_key(tz: tzinfo): dst_cache = {} -cdef inline object tz_cache_key(tzinfo tz): +cdef object tz_cache_key(tzinfo tz): """ Return the key in the cache for the timezone info object or None if unknown. diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 1b95899e5c037..7c1dd04e2b2cc 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -31,7 +31,7 @@ cdef class Localizer: int64_t delta int64_t* tdata - cdef inline int64_t utc_val_to_local_val( + cdef int64_t utc_val_to_local_val( self, int64_t utc_val, Py_ssize_t* pos, diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index f74c72dc4e35c..ad894f70f0cb2 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -116,7 +116,7 @@ cdef class Localizer: self.tdata = cnp.PyArray_DATA(trans) @cython.boundscheck(False) - cdef inline int64_t utc_val_to_local_val( + cdef int64_t utc_val_to_local_val( self, int64_t utc_val, Py_ssize_t* pos, bint* fold=NULL ) except? -1: if self.use_utc: @@ -405,7 +405,7 @@ timedelta-like} return result.base # .base to get underlying ndarray -cdef inline Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): +cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): # Caller is responsible for checking n > 0 # This looks very similar to local_search_right in the ndarray.searchsorted # implementation. @@ -434,7 +434,7 @@ cdef inline Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n) return left -cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT creso): +cdef str _render_tstamp(int64_t val, NPY_DATETIMEUNIT creso): """ Helper function to render exception messages""" from pandas._libs.tslibs.timestamps import Timestamp ts = Timestamp._from_value_and_reso(val, creso, None) diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 4763ea2f6b748..06e09d890de69 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -198,7 +198,7 @@ def ints_to_pydatetime( # ------------------------------------------------------------------------- -cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts): +cdef c_Resolution _reso_stamp(npy_datetimestruct *dts): if dts.ps != 0: return c_Resolution.RESO_NS elif dts.us != 0: diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 57ef3601b7461..3055b8ff48cc9 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1,6 +1,5 @@ # cython: boundscheck=False, wraparound=False, cdivision=True -cimport cython from libc.math cimport ( round, signbit, @@ -24,8 +23,6 @@ cnp.import_array() from pandas._libs.algos import is_monotonic -from pandas._libs.dtypes cimport numeric_t - cdef extern from "../src/skiplist.h": ctypedef struct node_t: @@ -69,9 +66,9 @@ cdef bint is_monotonic_increasing_start_end_bounds( # Rolling sum -cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x, - int64_t num_consecutive_same_value, float64_t prev_value - ) nogil: +cdef float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x, + int64_t num_consecutive_same_value, float64_t prev_value + ) nogil: cdef: float64_t result @@ -88,9 +85,9 @@ cdef inline float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x, return result -cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x, - float64_t *compensation, int64_t *num_consecutive_same_value, - float64_t *prev_value) nogil: +cdef void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x, + float64_t *compensation, int64_t *num_consecutive_same_value, + float64_t *prev_value) nogil: """ add a value from the sum calc using Kahan summation """ cdef: @@ -113,8 +110,8 @@ cdef inline void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x, prev_value[0] = val -cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, - float64_t *compensation) nogil: +cdef void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x, + float64_t *compensation) nogil: """ remove a value from the sum calc using Kahan summation """ cdef: @@ -188,9 +185,9 @@ def roll_sum(const float64_t[:] values, ndarray[int64_t] start, # Rolling mean -cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct, - float64_t sum_x, int64_t num_consecutive_same_value, - float64_t prev_value) nogil: +cdef float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct, + float64_t sum_x, int64_t num_consecutive_same_value, + float64_t prev_value) nogil: cdef: float64_t result @@ -211,7 +208,7 @@ cdef inline float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct return result -cdef inline void add_mean( +cdef void add_mean( float64_t val, Py_ssize_t *nobs, float64_t *sum_x, @@ -243,8 +240,8 @@ cdef inline void add_mean( prev_value[0] = val -cdef inline void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, - Py_ssize_t *neg_ct, float64_t *compensation) nogil: +cdef void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x, + Py_ssize_t *neg_ct, float64_t *compensation) nogil: """ remove a value from the mean calc using Kahan summation """ cdef: float64_t y, t @@ -319,7 +316,7 @@ def roll_mean(const float64_t[:] values, ndarray[int64_t] start, # Rolling variance -cdef inline float64_t calc_var( +cdef float64_t calc_var( int64_t minp, int ddof, float64_t nobs, @@ -343,7 +340,7 @@ cdef inline float64_t calc_var( return result -cdef inline void add_var( +cdef void add_var( float64_t val, float64_t *nobs, float64_t *mean_x, @@ -385,7 +382,7 @@ cdef inline void add_var( ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0]) -cdef inline void remove_var( +cdef void remove_var( float64_t val, float64_t *nobs, float64_t *mean_x, @@ -480,10 +477,10 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start, # Rolling skewness -cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, - float64_t x, float64_t xx, float64_t xxx, - int64_t num_consecutive_same_value - ) nogil: +cdef float64_t calc_skew(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, float64_t xxx, + int64_t num_consecutive_same_value + ) nogil: cdef: float64_t result, dnobs float64_t A, B, C, R @@ -521,15 +518,15 @@ cdef inline float64_t calc_skew(int64_t minp, int64_t nobs, return result -cdef inline void add_skew(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx, - int64_t *num_consecutive_same_value, - float64_t *prev_value, - ) nogil: +cdef void add_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + int64_t *num_consecutive_same_value, + float64_t *prev_value, + ) nogil: """ add a value from the skew calc """ cdef: float64_t y, t @@ -560,12 +557,12 @@ cdef inline void add_skew(float64_t val, int64_t *nobs, prev_value[0] = val -cdef inline void remove_skew(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx) nogil: +cdef void remove_skew(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx) nogil: """ remove a value from the skew calc """ cdef: float64_t y, t @@ -678,11 +675,11 @@ def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start, # Rolling kurtosis -cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, - float64_t x, float64_t xx, - float64_t xxx, float64_t xxxx, - int64_t num_consecutive_same_value, - ) nogil: +cdef float64_t calc_kurt(int64_t minp, int64_t nobs, + float64_t x, float64_t xx, + float64_t xxx, float64_t xxxx, + int64_t num_consecutive_same_value, + ) nogil: cdef: float64_t result, dnobs float64_t A, B, C, D, R, K @@ -724,16 +721,16 @@ cdef inline float64_t calc_kurt(int64_t minp, int64_t nobs, return result -cdef inline void add_kurt(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, float64_t *xxxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx, - float64_t *compensation_xxxx, - int64_t *num_consecutive_same_value, - float64_t *prev_value - ) nogil: +cdef void add_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + float64_t *compensation_xxxx, + int64_t *num_consecutive_same_value, + float64_t *prev_value + ) nogil: """ add a value from the kurotic calc """ cdef: float64_t y, t @@ -768,13 +765,13 @@ cdef inline void add_kurt(float64_t val, int64_t *nobs, prev_value[0] = val -cdef inline void remove_kurt(float64_t val, int64_t *nobs, - float64_t *x, float64_t *xx, - float64_t *xxx, float64_t *xxxx, - float64_t *compensation_x, - float64_t *compensation_xx, - float64_t *compensation_xxx, - float64_t *compensation_xxxx) nogil: +cdef void remove_kurt(float64_t val, int64_t *nobs, + float64_t *x, float64_t *xx, + float64_t *xxx, float64_t *xxxx, + float64_t *compensation_x, + float64_t *compensation_xx, + float64_t *compensation_xxx, + float64_t *compensation_xxxx) nogil: """ remove a value from the kurotic calc """ cdef: float64_t y, t @@ -993,46 +990,33 @@ def roll_median_c(const float64_t[:] values, ndarray[int64_t] start, # https://github.com/pydata/bottleneck -cdef inline numeric_t init_mm(numeric_t ai, Py_ssize_t *nobs, bint is_max) nogil: +cdef float64_t init_mm(float64_t ai, Py_ssize_t *nobs, bint is_max) nogil: - if numeric_t in cython.floating: - if ai == ai: - nobs[0] = nobs[0] + 1 - elif is_max: - if numeric_t == cython.float: - ai = MINfloat32 - else: - ai = MINfloat64 - else: - if numeric_t == cython.float: - ai = MAXfloat32 - else: - ai = MAXfloat64 - - else: + if ai == ai: nobs[0] = nobs[0] + 1 + elif is_max: + ai = MINfloat64 + else: + ai = MAXfloat64 return ai -cdef inline void remove_mm(numeric_t aold, Py_ssize_t *nobs) nogil: +cdef void remove_mm(float64_t aold, Py_ssize_t *nobs) nogil: """ remove a value from the mm calc """ - if numeric_t in cython.floating and aold == aold: + if aold == aold: nobs[0] = nobs[0] - 1 -cdef inline numeric_t calc_mm(int64_t minp, Py_ssize_t nobs, - numeric_t value) nogil: +cdef float64_t calc_mm(int64_t minp, Py_ssize_t nobs, + float64_t value) nogil: cdef: - numeric_t result + float64_t result - if numeric_t in cython.floating: - if nobs >= minp: - result = value - else: - result = NaN - else: + if nobs >= minp: result = value + else: + result = NaN return result @@ -1082,13 +1066,13 @@ def roll_min(ndarray[float64_t] values, ndarray[int64_t] start, return _roll_min_max(values, start, end, minp, is_max=0) -cdef _roll_min_max(ndarray[numeric_t] values, +cdef _roll_min_max(ndarray[float64_t] values, ndarray[int64_t] starti, ndarray[int64_t] endi, int64_t minp, bint is_max): cdef: - numeric_t ai + float64_t ai int64_t curr_win_size, start Py_ssize_t i, k, nobs = 0, N = len(starti) deque Q[int64_t] # min/max always the front @@ -1531,12 +1515,12 @@ cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values, # Rolling var for weighted window -cdef inline float64_t calc_weighted_var(float64_t t, - float64_t sum_w, - Py_ssize_t win_n, - unsigned int ddof, - float64_t nobs, - int64_t minp) nogil: +cdef float64_t calc_weighted_var(float64_t t, + float64_t sum_w, + Py_ssize_t win_n, + unsigned int ddof, + float64_t nobs, + int64_t minp) nogil: """ Calculate weighted variance for a window using West's method. @@ -1582,12 +1566,12 @@ cdef inline float64_t calc_weighted_var(float64_t t, return result -cdef inline void add_weighted_var(float64_t val, - float64_t w, - float64_t *t, - float64_t *sum_w, - float64_t *mean, - float64_t *nobs) nogil: +cdef void add_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: """ Update weighted mean, sum of weights and sum of weighted squared differences to include value and weight pair in weighted variance @@ -1628,12 +1612,12 @@ cdef inline void add_weighted_var(float64_t val, sum_w[0] = temp -cdef inline void remove_weighted_var(float64_t val, - float64_t w, - float64_t *t, - float64_t *sum_w, - float64_t *mean, - float64_t *nobs) nogil: +cdef void remove_weighted_var(float64_t val, + float64_t w, + float64_t *t, + float64_t *sum_w, + float64_t *mean, + float64_t *nobs) nogil: """ Update weighted mean, sum of weights and sum of weighted squared differences to remove value and weight pair from weighted variance diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index 527e8c1d0d090..29618bdd64912 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -225,7 +225,7 @@ def wrapper(*args, **kwargs): ) try: return t(*args, **kwargs) - except Exception as err: # pylint: disable=broad-except + except Exception as err: errno = getattr(err, "errno", None) if not errno and hasattr(errno, "reason"): # error: "Exception" has no attribute "reason" diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index abad188f06720..9bd4b384fadb0 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -41,7 +41,7 @@ "sqlalchemy": "1.4.16", "tables": "3.6.1", "tabulate": "0.8.9", - "xarray": "0.19.0", + "xarray": "0.21.0", "xlrd": "2.0.1", "xlsxwriter": "1.4.3", "zstandard": "0.15.2", diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 286cec3afdc45..0162f54bf5225 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -74,7 +74,10 @@ is_timedelta64_dtype, pandas_dtype, ) -from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + ExtensionDtype, +) from pandas.core.dtypes.missing import isna from pandas.core.arrays import datetimelike as dtl @@ -645,6 +648,25 @@ def astype(self, dtype, copy: bool = True): return self.copy() return self + elif isinstance(dtype, ExtensionDtype): + if not isinstance(dtype, DatetimeTZDtype): + # e.g. Sparse[datetime64[ns]] + return super().astype(dtype, copy=copy) + elif self.tz is None: + # pre-2.0 this did self.tz_localize(dtype.tz), which did not match + # the Series behavior which did + # values.tz_localize("UTC").tz_convert(dtype.tz) + raise TypeError( + "Cannot use .astype to convert from timezone-naive dtype to " + "timezone-aware dtype. Use obj.tz_localize instead or " + "series.dt.tz_localize instead" + ) + else: + # tzaware unit conversion e.g. datetime64[s, UTC] + np_dtype = np.dtype(dtype.str) + res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) + return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq) + elif ( self.tz is None and is_datetime64_dtype(dtype) @@ -656,22 +678,6 @@ def astype(self, dtype, copy: bool = True): return type(self)._simple_new(res_values, dtype=res_values.dtype) # TODO: preserve freq? - elif self.tz is not None and isinstance(dtype, DatetimeTZDtype): - # tzaware unit conversion e.g. datetime64[s, UTC] - np_dtype = np.dtype(dtype.str) - res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy) - return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq) - - elif self.tz is None and isinstance(dtype, DatetimeTZDtype): - # pre-2.0 this did self.tz_localize(dtype.tz), which did not match - # the Series behavior which did - # values.tz_localize("UTC").tz_convert(dtype.tz) - raise TypeError( - "Cannot use .astype to convert from timezone-naive dtype to " - "timezone-aware dtype. Use obj.tz_localize instead or " - "series.dt.tz_localize instead" - ) - elif self.tz is not None and is_datetime64_dtype(dtype): # pre-2.0 behavior for DTA/DTI was # values.tz_convert("UTC").tz_localize(None), which did not match diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 033917fe9eb2d..2a6e26fbdbd1c 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -49,10 +49,7 @@ validate_insert_loc, ) -from pandas.core.dtypes.astype import ( - astype_array, - astype_nansafe, -) +from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, find_common_type, @@ -445,7 +442,7 @@ def __init__( # NumPy may raise a ValueError on data like [1, []] # we retry with object dtype here. if dtype is None: - dtype = object + dtype = np.dtype(object) data = np.atleast_1d(np.asarray(data, dtype=dtype)) else: raise @@ -464,10 +461,7 @@ def __init__( if isinstance(data, type(self)) and sparse_index is None: sparse_index = data._sparse_index # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" + # "Union[ExtensionDtype, dtype[Any], None]"; expected "None" sparse_values = np.asarray( data.sp_values, dtype=dtype # type: ignore[arg-type] ) @@ -487,10 +481,10 @@ def __init__( if fill_value is NaT: fill_value = np.datetime64("NaT", "ns") data = np.asarray(data) - sparse_values, sparse_index, fill_value = make_sparse( - # error: Argument "dtype" to "make_sparse" has incompatible type - # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected - # "Union[str, dtype[Any], None]" + sparse_values, sparse_index, fill_value = _make_sparse( + # error: Argument "dtype" to "_make_sparse" has incompatible type + # "Union[ExtensionDtype, dtype[Any], None]"; expected + # "Optional[dtype[Any]]" data, kind=kind, fill_value=fill_value, @@ -498,10 +492,7 @@ def __init__( ) else: # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, dtype[Any], Type[object], None]"; expected - # "Union[dtype[Any], None, type, _SupportsDType, str, Union[Tuple[Any, int], - # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, - # Any]]]" + # "Union[ExtensionDtype, dtype[Any], None]"; expected "None" sparse_values = np.asarray(data, dtype=dtype) # type: ignore[arg-type] if len(sparse_values) != sparse_index.npoints: raise AssertionError( @@ -1288,7 +1279,9 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): dtype = self.dtype.update_dtype(dtype) subtype = pandas_dtype(dtype._subtype_with_str) subtype = cast(np.dtype, subtype) # ensured by update_dtype - sp_values = astype_nansafe(self.sp_values, subtype, copy=copy) + values = ensure_wrapped_if_datetimelike(self.sp_values) + sp_values = astype_array(values, subtype, copy=copy) + sp_values = np.asarray(sp_values) return self._simple_new(sp_values, self.sp_index, dtype) @@ -1828,11 +1821,11 @@ def _formatter(self, boxed: bool = False): return None -def make_sparse( +def _make_sparse( arr: np.ndarray, kind: SparseIndexKind = "block", fill_value=None, - dtype: NpDtype | None = None, + dtype: np.dtype | None = None, ): """ Convert ndarray to sparse format @@ -1882,7 +1875,10 @@ def make_sparse( index = make_sparse_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: - sparsified_values = astype_nansafe(sparsified_values, dtype=pandas_dtype(dtype)) + sparsified_values = ensure_wrapped_if_datetimelike(sparsified_values) + sparsified_values = astype_array(sparsified_values, dtype=dtype) + sparsified_values = np.asarray(sparsified_values) + # TODO: copy return sparsified_values, index, fill_value diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index f9a0ac6d38b6d..1b48e90538e8e 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -127,7 +127,15 @@ def __eq__(self, other: Any) -> bool: or isinstance(other.fill_value, type(self.fill_value)) ) else: - fill_value = self.fill_value == other.fill_value + with warnings.catch_warnings(): + # Ignore spurious numpy warning + warnings.filterwarnings( + "ignore", + "elementwise comparison failed", + category=DeprecationWarning, + ) + + fill_value = self.fill_value == other.fill_value return subtype and fill_value return False diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index 53c2cfd345e32..57af8c22f76d3 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -7,6 +7,7 @@ import inspect from typing import ( TYPE_CHECKING, + cast, overload, ) @@ -36,7 +37,11 @@ from pandas.core.dtypes.missing import isna if TYPE_CHECKING: - from pandas.core.arrays import ExtensionArray + from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + TimedeltaArray, + ) _dtype_obj = np.dtype(object) @@ -109,7 +114,11 @@ def astype_nansafe( # allow frequency conversions if dtype.kind == "M": - return arr.astype(dtype) + from pandas.core.construction import ensure_wrapped_if_datetimelike + + dta = ensure_wrapped_if_datetimelike(arr) + dta = cast("DatetimeArray", dta) + return dta.astype(dtype, copy=copy)._ndarray raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") @@ -124,8 +133,9 @@ def astype_nansafe( # and doing the old convert-to-float behavior otherwise. from pandas.core.construction import ensure_wrapped_if_datetimelike - arr = ensure_wrapped_if_datetimelike(arr) - return arr.astype(dtype, copy=copy) + tda = ensure_wrapped_if_datetimelike(arr) + tda = cast("TimedeltaArray", tda) + return tda.astype(dtype, copy=copy)._ndarray raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") @@ -145,10 +155,15 @@ def astype_nansafe( return dta.astype(dtype, copy=False)._ndarray elif is_timedelta64_dtype(dtype): + from pandas.core.construction import ensure_wrapped_if_datetimelike + # bc we know arr.dtype == object, this is equivalent to # `np.asarray(to_timedelta(arr))`, but using a lower-level API that # does not require a circular import. - return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) + tdvals = array_to_timedelta64(arr).view("m8[ns]") + + tda = ensure_wrapped_if_datetimelike(tdvals) + return tda.astype(dtype, copy=False)._ndarray if dtype.name in ("datetime64", "timedelta64"): msg = ( diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f227eb46273a5..249b8c99b8682 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -485,7 +485,7 @@ def maybe_cast_to_extension_array( try: result = cls._from_sequence(obj, dtype=dtype) - except Exception: # pylint: disable=broad-except + except Exception: # We can't predict what downstream EA constructors may raise result = obj return result diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 03429fd9fee1d..80eaf13d9dd06 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -4,12 +4,10 @@ from __future__ import annotations from typing import TYPE_CHECKING -import warnings import numpy as np from pandas._typing import AxisInt -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( @@ -115,15 +113,8 @@ def is_nonempty(x) -> bool: result = np.concatenate(to_concat, axis=axis) if "b" in kinds and result.dtype.kind in ["i", "u", "f"]: - # GH#39817 - warnings.warn( - "Behavior when concatenating bool-dtype and numeric-dtype arrays is " - "deprecated; in a future version these will cast to object dtype " - "(instead of coercing bools to numeric values). To retain the old " - "behavior, explicitly cast bool-dtype arrays to numeric dtype.", - FutureWarning, - stacklevel=find_stack_level(), - ) + # GH#39817 cast to object instead of casting bools to numeric + result = result.astype(object, copy=False) return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 98af277cc0bd7..37c48bb7adbba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -632,8 +632,6 @@ def __init__( copy: bool | None = None, ) -> None: - if data is None: - data = {} if dtype is not None: dtype = self._validate_dtype(dtype) @@ -671,6 +669,12 @@ def __init__( else: copy = False + if data is None: + index = index if index is not None else default_index(0) + columns = columns if columns is not None else default_index(0) + dtype = dtype if dtype is not None else pandas_dtype(object) + data = [] + if isinstance(data, (BlockManager, ArrayManager)): mgr = self._init_mgr( data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy @@ -777,7 +781,7 @@ def __init__( mgr = dict_to_mgr( {}, index, - columns, + columns if columns is not None else default_index(0), dtype=dtype, typ=manager, ) @@ -2309,8 +2313,7 @@ def maybe_reorder( result_index = None if len(arrays) == 0 and index is None and length == 0: - # for backward compat use an object Index instead of RangeIndex - result_index = Index([]) + result_index = default_index(0) arrays, arr_columns = reorder_arrays(arrays, arr_columns, columns, length) return arrays, arr_columns, result_index diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 37b7af13fc7c4..c659f8d0d9a4d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6316,7 +6316,7 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: return self.copy(deep=True) @final - def infer_objects(self: NDFrameT) -> NDFrameT: + def infer_objects(self: NDFrameT, copy: bool_t = True) -> NDFrameT: """ Attempt to infer better dtypes for object columns. @@ -6325,6 +6325,12 @@ def infer_objects(self: NDFrameT) -> NDFrameT: columns unchanged. The inference rules are the same as during normal Series/DataFrame construction. + Parameters + ---------- + copy : bool, default True + Whether to make a copy for non-object or non-inferrable columns + or Series. + Returns ------- converted : same type as input object @@ -6354,7 +6360,7 @@ def infer_objects(self: NDFrameT) -> NDFrameT: A int64 dtype: object """ - new_mgr = self._mgr.convert() + new_mgr = self._mgr.convert(copy=copy) return self._constructor(new_mgr).__finalize__(self, method="infer_objects") @final diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d3e37a40614b3..819220d13566b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,7 +24,6 @@ Union, cast, ) -import warnings import numpy as np @@ -51,7 +50,6 @@ Substitution, doc, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( ensure_int64, @@ -1392,33 +1390,15 @@ def _transform_general(self, func, *args, **kwargs): applied.append(res) # Compute and process with the remaining groups - emit_alignment_warning = False for name, group in gen: if group.size == 0: continue object.__setattr__(group, "name", name) res = path(group) - if ( - not emit_alignment_warning - and res.ndim == 2 - and not res.index.equals(group.index) - ): - emit_alignment_warning = True res = _wrap_transform_general_frame(self.obj, group, res) applied.append(res) - if emit_alignment_warning: - # GH#45648 - warnings.warn( - "In a future version of pandas, returning a DataFrame in " - "groupby.transform will align with the input's index. Apply " - "`.to_numpy()` to the result in the transform function to keep " - "the current behavior and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - concat_index = obj.columns if self.axis == 0 else obj.index other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 concatenated = concat(applied, axis=self.axis, verify_integrity=False) @@ -2336,5 +2316,7 @@ def _wrap_transform_general_frame( ) assert isinstance(res_frame, DataFrame) return res_frame + elif isinstance(res, DataFrame) and not res.index.is_(group.index): + return res._align_frame(group)[0] else: return res diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c7fb40e855ef7..6cb9bb7f23a06 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -471,12 +471,11 @@ class providing the base-class of operations. The resulting dtype will reflect the return value of the passed ``func``, see the examples below. -.. deprecated:: 1.5.0 +.. versionchanged:: 2.0.0 When using ``.transform`` on a grouped DataFrame and the transformation function - returns a DataFrame, currently pandas does not align the result's index - with the input's index. This behavior is deprecated and alignment will - be performed in a future version of pandas. You can apply ``.to_numpy()`` to the + returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the result of the transformation function to avoid alignment. Examples diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce06b6bc01581..5a71ac247422a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3429,27 +3429,13 @@ def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: # -------------------------------------------------------------------- # Indexing Methods - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location, slice or boolean mask for requested label. Parameters ---------- key : label - method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional - * default: exact matches only. - * pad / ffill: find the PREVIOUS index value if no exact match. - * backfill / bfill: use NEXT index value if no exact match - * nearest: use the NEAREST index value if no exact match. Tied - distances are broken by preferring the larger index value. - - .. deprecated:: 1.4 - Use index.get_indexer([item], method=...) instead. - - tolerance : int or float, optional - Maximum distance from index value for inexact matches. The value of - the index at the matching location must satisfy the equation - ``abs(index[loc] - key) <= tolerance``. Returns ------- @@ -3469,46 +3455,17 @@ def get_loc(self, key, method=None, tolerance=None): >>> non_monotonic_index.get_loc('b') array([False, True, False, True]) """ - if method is None: - if tolerance is not None: - raise ValueError( - "tolerance argument only valid if using pad, " - "backfill or nearest lookups" - ) - casted_key = self._maybe_cast_indexer(key) - try: - return self._engine.get_loc(casted_key) - except KeyError as err: - raise KeyError(key) from err - except TypeError: - # If we have a listlike key, _check_indexing_error will raise - # InvalidIndexError. Otherwise we fall through and re-raise - # the TypeError. - self._check_indexing_error(key) - raise - - # GH#42269 - warnings.warn( - f"Passing method to {type(self).__name__}.get_loc is deprecated " - "and will raise in a future version. Use " - "index.get_indexer([item], method=...) instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if is_scalar(key) and isna(key) and not self.hasnans: - raise KeyError(key) - - if tolerance is not None: - tolerance = self._convert_tolerance(tolerance, np.asarray(key)) - - indexer = self.get_indexer([key], method=method, tolerance=tolerance) - if indexer.ndim > 1 or indexer.size > 1: - raise TypeError("get_loc requires scalar valued input") - loc = indexer.item() - if loc == -1: - raise KeyError(key) - return loc + casted_key = self._maybe_cast_indexer(key) + try: + return self._engine.get_loc(casted_key) + except KeyError as err: + raise KeyError(key) from err + except TypeError: + # If we have a listlike key, _check_indexing_error will raise + # InvalidIndexError. Otherwise we fall through and re-raise + # the TypeError. + self._check_indexing_error(key) + raise _index_shared_docs[ "get_indexer" @@ -6521,6 +6478,36 @@ def drop( indexer = indexer[~mask] return self.delete(indexer) + def infer_objects(self, copy: bool = True) -> Index: + """ + If we have an object dtype, try to infer a non-object dtype. + + Parameters + ---------- + copy : bool, default True + Whether to make a copy in cases where no inference occurs. + """ + if self._is_multi: + raise NotImplementedError( + "infer_objects is not implemented for MultiIndex. " + "Use index.to_frame().infer_objects() instead." + ) + if self.dtype != object: + return self.copy() if copy else self + + values = self._values + values = cast("npt.NDArray[np.object_]", values) + res_values = lib.maybe_convert_objects( + values, + convert_datetime=True, + convert_timedelta=True, + convert_period=True, + convert_interval=True, + ) + if copy and res_values is values: + return self.copy() + return Index(res_values, name=self.name) + # -------------------------------------------------------------------- # Generated Arithmetic, Comparison, and Unary Methods @@ -6585,10 +6572,10 @@ def _logical_method(self, other, op): def _construct_result(self, result, name): if isinstance(result, tuple): return ( - Index._with_infer(result[0], name=name), - Index._with_infer(result[1], name=name), + Index(result[0], name=name, dtype=result[0].dtype), + Index(result[1], name=name, dtype=result[1].dtype), ) - return Index._with_infer(result, name=name) + return Index(result, name=name, dtype=result.dtype) def _arith_method(self, other, op): if ( diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c30323338e676..784b5c8b24e32 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -556,7 +556,7 @@ def _disallow_mismatched_indexing(self, key) -> None: except TypeError as err: raise KeyError(key) from err - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location for requested label @@ -587,8 +587,7 @@ def get_loc(self, key, method=None, tolerance=None): try: return self._partial_date_slice(reso, parsed) except KeyError as err: - if method is None: - raise KeyError(key) from err + raise KeyError(key) from err key = parsed @@ -599,10 +598,6 @@ def get_loc(self, key, method=None, tolerance=None): ) elif isinstance(key, dt.time): - if method is not None: - raise NotImplementedError( - "cannot yet lookup inexact labels when key is a time object" - ) return self.indexer_at_time(key) else: @@ -610,7 +605,7 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) try: - return Index.get_loc(self, key, method, tolerance) + return Index.get_loc(self, key) except KeyError as err: raise KeyError(orig_key) from err diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 012a92793acf9..8776d78ae6d9a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2730,7 +2730,7 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: else: return level_index.get_loc(key) - def get_loc(self, key, method=None): + def get_loc(self, key): """ Get location for a label or a tuple of labels. @@ -2740,7 +2740,6 @@ def get_loc(self, key, method=None): Parameters ---------- key : label or tuple of labels (one for each level) - method : None Returns ------- @@ -2772,12 +2771,6 @@ def get_loc(self, key, method=None): >>> mi.get_loc(('b', 'e')) 1 """ - if method is not None: - raise NotImplementedError( - "only the default get_loc method is " - "currently supported for MultiIndex" - ) - self._check_indexing_error(key) def _maybe_to_slice(loc): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ef47cb9bf1070..877bb2844e8c9 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -375,7 +375,7 @@ def _convert_tolerance(self, tolerance, target): return tolerance - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location for requested label. @@ -421,10 +421,8 @@ def get_loc(self, key, method=None, tolerance=None): # the reso < self._resolution_obj case goes # through _get_string_slice key = self._cast_partial_indexing_scalar(parsed) - elif method is None: - raise KeyError(key) else: - key = self._cast_partial_indexing_scalar(parsed) + raise KeyError(key) elif isinstance(key, Period): self._disallow_mismatched_indexing(key) @@ -437,7 +435,7 @@ def get_loc(self, key, method=None, tolerance=None): raise KeyError(key) try: - return Index.get_loc(self, key, method, tolerance) + return Index.get_loc(self, key) except KeyError as err: raise KeyError(orig_key) from err diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a2281c6fd9540..1937cd4254790 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -328,17 +328,15 @@ def inferred_type(self) -> str: # Indexing Methods @doc(Int64Index.get_loc) - def get_loc(self, key, method=None, tolerance=None): - if method is None and tolerance is None: - if is_integer(key) or (is_float(key) and key.is_integer()): - new_key = int(key) - try: - return self._range.index(new_key) - except ValueError as err: - raise KeyError(key) from err - self._check_indexing_error(key) - raise KeyError(key) - return super().get_loc(key, method=method, tolerance=tolerance) + def get_loc(self, key): + if is_integer(key) or (is_float(key) and key.is_integer()): + new_key = int(key) + try: + return self._range.index(new_key) + except ValueError as err: + raise KeyError(key) from err + self._check_indexing_error(key) + raise KeyError(key) def _get_indexer( self, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 82ac2bd139b1f..54a9152670cab 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -174,7 +174,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: # ------------------------------------------------------------------- # Indexing Methods - def get_loc(self, key, method=None, tolerance=None): + def get_loc(self, key): """ Get integer location for requested label @@ -189,7 +189,7 @@ def get_loc(self, key, method=None, tolerance=None): except TypeError as err: raise KeyError(key) from err - return Index.get_loc(self, key, method, tolerance) + return Index.get_loc(self, key) def _parse_with_reso(self, label: str): # the "with_reso" is a no-op for TimedeltaIndex diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 918c70ff91da5..fbb7b199019cb 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -374,19 +374,23 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) - def convert(self: T) -> T: + def convert(self: T, copy: bool) -> T: def _convert(arr): if is_object_dtype(arr.dtype): # extract PandasArray for tests that patch PandasArray._typ arr = np.asarray(arr) - return lib.maybe_convert_objects( + result = lib.maybe_convert_objects( arr, convert_datetime=True, convert_timedelta=True, convert_period=True, + convert_interval=True, ) + if result is arr and copy: + return arr.copy() + return result else: - return arr.copy() + return arr.copy() if copy else arr return self.apply(_convert) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e5a5cda744736..0999a9af3f72a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1983,7 +1983,10 @@ def convert( convert_datetime=True, convert_timedelta=True, convert_period=True, + convert_interval=True, ) + if copy and res_values is values: + res_values = values.copy() res_values = ensure_block_shape(res_values, self.ndim) return [self.make_block(res_values)] diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 563011abe2c41..07fab0080a747 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -582,7 +582,7 @@ def _extract_index(data) -> Index: """ index: Index if len(data) == 0: - return Index([]) + return default_index(0) raw_lengths = [] indexes: list[list[Hashable] | Index] = [] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d1eee23f1908c..6275e04c30e08 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -441,10 +441,10 @@ def fillna(self: T, value, limit, inplace: bool, downcast) -> T: def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: return self.apply("astype", dtype=dtype, copy=copy, errors=errors) - def convert(self: T) -> T: + def convert(self: T, copy: bool) -> T: return self.apply( "convert", - copy=True, + copy=copy, ) def replace(self: T, to_replace, value, inplace: bool) -> T: diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index bfedaca093a8e..e514bdcac5265 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -230,18 +230,27 @@ def align_method_FRAME( def to_series(right): msg = "Unable to coerce to Series, length must be {req_len}: given {given_len}" + + # pass dtype to avoid doing inference, which would break consistency + # with Index/Series ops + dtype = None + if getattr(right, "dtype", None) == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + if axis is not None and left._get_axis_name(axis) == "index": if len(left.index) != len(right): raise ValueError( msg.format(req_len=len(left.index), given_len=len(right)) ) - right = left._constructor_sliced(right, index=left.index) + right = left._constructor_sliced(right, index=left.index, dtype=dtype) else: if len(left.columns) != len(right): raise ValueError( msg.format(req_len=len(left.columns), given_len=len(right)) ) - right = left._constructor_sliced(right, index=left.columns) + right = left._constructor_sliced(right, index=left.columns, dtype=dtype) return right if isinstance(right, np.ndarray): @@ -250,13 +259,25 @@ def to_series(right): right = to_series(right) elif right.ndim == 2: + # We need to pass dtype=right.dtype to retain object dtype + # otherwise we lose consistency with Index and array ops + dtype = None + if getattr(right, "dtype", None) == object: + # can't pass right.dtype unconditionally as that would break on e.g. + # datetime64[h] ndarray + dtype = object + if right.shape == left.shape: - right = left._constructor(right, index=left.index, columns=left.columns) + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) elif right.shape[0] == left.shape[0] and right.shape[1] == 1: # Broadcast across columns right = np.broadcast_to(right, left.shape) - right = left._constructor(right, index=left.index, columns=left.columns) + right = left._constructor( + right, index=left.index, columns=left.columns, dtype=dtype + ) elif right.shape[1] == left.shape[1] and right.shape[0] == 1: # Broadcast along rows @@ -406,7 +427,10 @@ def _maybe_align_series_as_frame(frame: DataFrame, series: Series, axis: AxisInt rvalues = rvalues.reshape(1, -1) rvalues = np.broadcast_to(rvalues, frame.shape) - return type(frame)(rvalues, index=frame.index, columns=frame.columns) + # pass dtype to avoid doing inference + return type(frame)( + rvalues, index=frame.index, columns=frame.columns, dtype=rvalues.dtype + ) def flex_arith_method_FRAME(op): diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index c76ffdfaa7332..16f1a5d0b81e2 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -152,10 +152,6 @@ def merge( return op.get_result(copy=copy) -if __debug__: - merge.__doc__ = _merge_doc % "\nleft : DataFrame" - - def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): """ groupby & merge; we are always performing a left-by type operation @@ -232,8 +228,8 @@ def merge_ordered( Parameters ---------- - left : DataFrame - right : DataFrame + left : DataFrame or named Series + right : DataFrame or named Series on : label or list Field names to join on. Must be found in both DataFrames. left_on : label or list, or array-like @@ -245,10 +241,10 @@ def merge_ordered( left_on docs. left_by : column name or list of column names Group left DataFrame by group columns and merge piece by piece with - right DataFrame. + right DataFrame. Must be None if either left or right are a Series. right_by : column name or list of column names Group right DataFrame by group columns and merge piece by piece with - left DataFrame. + left DataFrame. Must be None if either left or right are a Series. fill_method : {'ffill', None}, default None Interpolation method for data. suffixes : list-like, default is ("_x", "_y") @@ -1088,8 +1084,8 @@ def _get_join_info( else: join_index = default_index(len(left_indexer)) - if len(join_index) == 0: - join_index = join_index.astype(object) + if len(join_index) == 0 and not isinstance(join_index, MultiIndex): + join_index = default_index(0).set_names(join_index.name) return join_index, left_indexer, right_indexer def _create_join_index( diff --git a/pandas/core/series.py b/pandas/core/series.py index 1e5f565934b50..9cd52857b12db 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -385,11 +385,16 @@ def __init__( if index is not None: index = ensure_index(index) - if data is None: - data = {} if dtype is not None: dtype = self._validate_dtype(dtype) + if data is None: + index = index if index is not None else default_index(0) + if len(index) or dtype is not None: + data = na_value_for_dtype(pandas_dtype(dtype), compat=False) + else: + data = [] + if isinstance(data, MultiIndex): raise NotImplementedError( "initializing a Series from a MultiIndex is not supported" @@ -2995,9 +3000,10 @@ def _construct_result( assert isinstance(res2, Series) return (res1, res2) - # We do not pass dtype to ensure that the Series constructor - # does inference in the case where `result` has object-dtype. - out = self._constructor(result, index=self.index) + # TODO: result should always be ArrayLike, but this fails for some + # JSONArray tests + dtype = getattr(result, "dtype", None) + out = self._constructor(result, index=self.index, dtype=dtype) out = out.__finalize__(self) # Set the result's name after __finalize__ is called because __finalize__ diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py index d6cb3d79c81e4..ddcd114aa352b 100644 --- a/pandas/core/window/common.py +++ b/pandas/core/window/common.py @@ -30,6 +30,8 @@ def dataframe_from_int_dict(data, frame_template): result = DataFrame(data, index=frame_template.index) if len(result.columns) > 0: result.columns = frame_template.columns[result.columns] + else: + result.columns = frame_template.columns.copy() return result results = {} diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 4d685bd8e8858..6c62c4efde6bb 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -290,7 +290,7 @@ def concat(self, other: Styler) -> Styler: Returns ------- - self : Styler + Styler Notes ----- @@ -415,7 +415,7 @@ def set_tooltips( Returns ------- - self : Styler + Styler Notes ----- @@ -1424,7 +1424,7 @@ def set_td_classes(self, classes: DataFrame) -> Styler: Returns ------- - self : Styler + Styler See Also -------- @@ -1727,7 +1727,7 @@ def apply( Returns ------- - self : Styler + Styler See Also -------- @@ -1844,7 +1844,7 @@ def apply_index( Returns ------- - self : Styler + Styler See Also -------- @@ -1948,7 +1948,7 @@ def applymap( Returns ------- - self : Styler + Styler See Also -------- @@ -2003,7 +2003,7 @@ def set_table_attributes(self, attributes: str) -> Styler: Returns ------- - self : Styler + Styler See Also -------- @@ -2105,7 +2105,7 @@ def use(self, styles: dict[str, Any]) -> Styler: Returns ------- - self : Styler + Styler See Also -------- @@ -2156,7 +2156,7 @@ def set_uuid(self, uuid: str) -> Styler: Returns ------- - self : Styler + Styler Notes ----- @@ -2180,7 +2180,7 @@ def set_caption(self, caption: str | tuple) -> Styler: Returns ------- - self : Styler + Styler """ msg = "`caption` must be either a string or 2-tuple of strings." if isinstance(caption, tuple): @@ -2218,7 +2218,7 @@ def set_sticky( Returns ------- - self : Styler + Styler Notes ----- @@ -2379,7 +2379,7 @@ def set_table_styles( Returns ------- - self : Styler + Styler See Also -------- @@ -2504,7 +2504,7 @@ def hide( Returns ------- - self : Styler + Styler Notes ----- @@ -2748,7 +2748,7 @@ def background_gradient( Returns ------- - self : Styler + Styler See Also -------- @@ -2881,7 +2881,7 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: Returns ------- - self : Styler + Styler Notes ----- @@ -2978,7 +2978,7 @@ def bar( # pylint: disable=disallowed-name Returns ------- - self : Styler + Styler Notes ----- @@ -3053,7 +3053,7 @@ def highlight_null( Returns ------- - self : Styler + Styler See Also -------- @@ -3099,7 +3099,7 @@ def highlight_max( Returns ------- - self : Styler + Styler See Also -------- @@ -3147,7 +3147,7 @@ def highlight_min( Returns ------- - self : Styler + Styler See Also -------- @@ -3203,7 +3203,7 @@ def highlight_between( Returns ------- - self : Styler + Styler See Also -------- @@ -3315,7 +3315,7 @@ def highlight_quantile( Returns ------- - self : Styler + Styler See Also -------- diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index c5fc054952b1f..ff94502d69ca3 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -84,6 +84,7 @@ from pandas.core.indexes.api import ( Index, MultiIndex, + default_index, ensure_index_from_sequences, ) from pandas.core.series import Series @@ -1093,8 +1094,9 @@ def _get_empty_meta( # # Both must be non-null to ensure a successful construction. Otherwise, # we have to create a generic empty Index. + index: Index if (index_col is None or index_col is False) or index_names is None: - index = Index([]) + index = default_index(0) else: data = [Series([], dtype=dtype_dict[name]) for name in index_names] index = ensure_index_from_sequences(data, names=index_names) diff --git a/pandas/io/sas/byteswap.pyx b/pandas/io/sas/byteswap.pyx index 2a4d3f66a5d7d..511af5140b563 100644 --- a/pandas/io/sas/byteswap.pyx +++ b/pandas/io/sas/byteswap.pyx @@ -81,13 +81,13 @@ cdef extern from *: uint64_t _byteswap8(uint64_t) -cdef inline float _byteswap_float(float num): +cdef float _byteswap_float(float num): cdef uint32_t *intptr = &num intptr[0] = _byteswap4(intptr[0]) return num -cdef inline double _byteswap_double(double num): +cdef double _byteswap_double(double num): cdef uint64_t *intptr = &num intptr[0] = _byteswap8(intptr[0]) return num diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index 7d0f549a2f976..4fe0f5ce91a51 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -31,30 +31,30 @@ cdef struct Buffer: size_t length -cdef inline uint8_t buf_get(Buffer buf, size_t offset) except? 255: +cdef uint8_t buf_get(Buffer buf, size_t offset) except? 255: assert offset < buf.length, "Out of bounds read" return buf.data[offset] -cdef inline bint buf_set(Buffer buf, size_t offset, uint8_t value) except 0: +cdef bint buf_set(Buffer buf, size_t offset, uint8_t value) except 0: assert offset < buf.length, "Out of bounds write" buf.data[offset] = value return True -cdef inline bytes buf_as_bytes(Buffer buf, size_t offset, size_t length): +cdef bytes buf_as_bytes(Buffer buf, size_t offset, size_t length): assert offset + length <= buf.length, "Out of bounds read" return buf.data[offset:offset+length] -cdef inline Buffer buf_new(size_t length) except *: +cdef Buffer buf_new(size_t length) except *: cdef uint8_t *data = calloc(length, sizeof(uint8_t)) if data == NULL: raise MemoryError(f"Failed to allocate {length} bytes") return Buffer(data, length) -cdef inline buf_free(Buffer buf): +cdef buf_free(Buffer buf): if buf.data != NULL: free(buf.data) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index c8b217319b91d..6a7edc2c5823c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -714,6 +714,10 @@ class PlotAccessor(PandasObject): Now applicable to planar plots (`scatter`, `hexbin`). + .. versionchanged:: 2.0.0 + + Now applicable to histograms. + ylabel : label, optional Name to use for the ylabel on y-axis. Default will show no ylabel, or the y-column name for planar plots. @@ -724,6 +728,10 @@ class PlotAccessor(PandasObject): Now applicable to planar plots (`scatter`, `hexbin`). + .. versionchanged:: 2.0.0 + + Now applicable to histograms. + rot : float, default None Rotation for ticks (xticks for vertical, yticks for horizontal plots). diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 337628aa3bc2e..1add485e03760 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -59,6 +59,8 @@ def __init__( ) -> None: self.bins = bins # use mpl default self.bottom = bottom + self.xlabel = kwargs.get("xlabel") + self.ylabel = kwargs.get("ylabel") # Do not call LinePlot.__init__ which may fill nan MPLPlot.__init__(self, data, **kwargs) # pylint: disable=non-parent-init-called @@ -170,9 +172,11 @@ def _make_plot_keywords(self, kwds, y): def _post_plot_logic(self, ax: Axes, data) -> None: if self.orientation == "horizontal": - ax.set_xlabel("Frequency") + ax.set_xlabel("Frequency" if self.xlabel is None else self.xlabel) + ax.set_ylabel(self.ylabel) else: - ax.set_ylabel("Frequency") + ax.set_xlabel(self.xlabel) + ax.set_ylabel("Frequency" if self.ylabel is None else self.ylabel) @property def orientation(self) -> PlottingOrientation: diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index e7c2618d388c2..c28c3ae58219a 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -114,14 +114,14 @@ def test_apply_with_reduce_empty(): result = empty_frame.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_frame) result = empty_frame.apply(x.append, axis=1, result_type="reduce") - expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) empty_with_cols = DataFrame(columns=["a", "b", "c"]) result = empty_with_cols.apply(x.append, axis=1, result_type="expand") tm.assert_frame_equal(result, empty_with_cols) result = empty_with_cols.apply(x.append, axis=1, result_type="reduce") - expected = Series([], index=pd.Index([], dtype=object), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) # Ensure that x.append hasn't been called @@ -147,7 +147,7 @@ def test_nunique_empty(): tm.assert_series_equal(result, expected) result = df.T.nunique() - expected = Series([], index=pd.Index([]), dtype=np.float64) + expected = Series([], dtype=np.float64) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index 61c879fb2b20f..add7b5c77ef65 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -8,7 +8,6 @@ from pandas import ( DataFrame, - Index, Series, ) import pandas._testing as tm @@ -149,8 +148,8 @@ def test_agg_cython_table_series(series, func, expected): tm.get_cython_table_params( Series(dtype=np.float64), [ - ("cumprod", Series([], Index([]), dtype=np.float64)), - ("cumsum", Series([], Index([]), dtype=np.float64)), + ("cumprod", Series([], dtype=np.float64)), + ("cumsum", Series([], dtype=np.float64)), ], ), tm.get_cython_table_params( diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index f2af85c2e388d..529dd6baa70c0 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -1147,9 +1147,6 @@ def test_numarr_with_dtype_add_nan(self, dtype, box_with_array): ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) - if box is Index and dtype is object: - # TODO: avoid this; match behavior with Series - expected = expected.astype(np.float64) result = np.nan + ser tm.assert_equal(result, expected) @@ -1165,9 +1162,6 @@ def test_numarr_with_dtype_add_int(self, dtype, box_with_array): ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) - if box is Index and dtype is object: - # TODO: avoid this; match behavior with Series - expected = expected.astype(np.int64) result = 1 + ser tm.assert_equal(result, expected) diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index e107ff6b65c0f..cba2b9be255fb 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -187,7 +187,8 @@ def test_series_with_dtype_radd_timedelta(self, dtype): dtype=dtype, ) expected = Series( - [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")] + [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")], + dtype=dtype, ) result = pd.Timedelta("3 days") + ser @@ -227,7 +228,9 @@ def test_mixed_timezone_series_ops_object(self): name="xxx", ) assert ser2.dtype == object - exp = Series([pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx") + exp = Series( + [pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object + ) tm.assert_series_equal(ser2 - ser, exp) tm.assert_series_equal(ser - ser2, -exp) @@ -238,7 +241,11 @@ def test_mixed_timezone_series_ops_object(self): ) assert ser.dtype == object - exp = Series([pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], name="xxx") + exp = Series( + [pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")], + name="xxx", + dtype=object, + ) tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp) tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 14d50acf3eadf..f3ea741607692 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -26,11 +26,7 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, - UInt64Index, -) +from pandas.core.api import NumericIndex from pandas.tests.arithmetic.common import ( assert_invalid_addsub_type, assert_invalid_comparison, @@ -492,10 +488,10 @@ def test_addition_ops(self): # random indexes msg = "Addition/subtraction of integers and integer-arrays" with pytest.raises(TypeError, match=msg): - tdi + Int64Index([1, 2, 3]) + tdi + NumericIndex([1, 2, 3], dtype=np.int64) # this is a union! - # pytest.raises(TypeError, lambda : Int64Index([1,2,3]) + tdi) + # pytest.raises(TypeError, lambda : Index([1,2,3]) + tdi) result = tdi + dti # name will be reset expected = DatetimeIndex(["20130102", NaT, "20130105"]) @@ -1398,7 +1394,7 @@ def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box_with_array): # ------------------------------------------------------------------ # Unsorted - def test_td64arr_add_sub_object_array(self, box_with_array): + def test_td64arr_add_sub_object_array(self, box_with_array, using_array_manager): box = box_with_array xbox = np.ndarray if box is pd.array else box @@ -1414,6 +1410,11 @@ def test_td64arr_add_sub_object_array(self, box_with_array): [Timedelta(days=2), Timedelta(days=4), Timestamp("2000-01-07")] ) expected = tm.box_expected(expected, xbox) + if not using_array_manager: + # TODO: avoid mismatched behavior. This occurs bc inference + # can happen within TimedeltaArray method, which means results + # depend on whether we split blocks. + expected = expected.astype(object) tm.assert_equal(result, expected) msg = "unsupported operand type|cannot subtract a datelike" @@ -1426,6 +1427,8 @@ def test_td64arr_add_sub_object_array(self, box_with_array): expected = pd.Index([Timedelta(0), Timedelta(0), Timestamp("2000-01-01")]) expected = tm.box_expected(expected, xbox) + if not using_array_manager: + expected = expected.astype(object) tm.assert_equal(result, expected) @@ -1508,9 +1511,9 @@ def test_tdi_mul_float_series(self, box_with_array): "other", [ np.arange(1, 11), - Int64Index(range(1, 11)), - UInt64Index(range(1, 11)), - Float64Index(range(1, 11)), + NumericIndex(np.arange(1, 11), np.int64), + NumericIndex(range(1, 11), np.uint64), + NumericIndex(range(1, 11), np.float64), pd.RangeIndex(1, 11), ], ids=lambda x: type(x).__name__, @@ -1594,7 +1597,7 @@ def test_td64arr_div_tdlike_scalar(self, two_hours, box_with_array): xbox = np.ndarray if box is pd.array else box rng = timedelta_range("1 days", "10 days", name="foo") - expected = Float64Index((np.arange(10) + 1) * 12, name="foo") + expected = NumericIndex((np.arange(10) + 1) * 12, dtype=np.float64, name="foo") rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, xbox) @@ -1634,7 +1637,7 @@ def test_td64arr_div_tdlike_scalar_with_nat(self, two_hours, box_with_array): xbox = np.ndarray if box is pd.array else box rng = TimedeltaIndex(["1 days", NaT, "2 days"], name="foo") - expected = Float64Index([12, np.nan, 24], name="foo") + expected = NumericIndex([12, np.nan, 24], dtype=np.float64, name="foo") rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, xbox) @@ -1652,7 +1655,7 @@ def test_td64arr_div_td64_ndarray(self, box_with_array): xbox = np.ndarray if box is pd.array else box rng = TimedeltaIndex(["1 days", NaT, "2 days"]) - expected = Float64Index([12, np.nan, 24]) + expected = NumericIndex([12, np.nan, 24], dtype=np.float64) rng = tm.box_expected(rng, box) expected = tm.box_expected(expected, xbox) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 924f7a56e806a..d729a31668ade 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -119,3 +119,15 @@ def test_astype_dt64_to_int64(self): result = arr.astype("int64") expected = values.astype("int64") tm.assert_numpy_array_equal(result, expected) + + # we should also be able to cast to equivalent Sparse[int64] + dtype_int64 = SparseDtype("int64", np.iinfo(np.int64).min) + result2 = arr.astype(dtype_int64) + tm.assert_numpy_array_equal(result2.to_numpy(), expected) + + # GH#50087 we should match the non-sparse behavior regardless of + # if we have a fill_value other than NaT + dtype = SparseDtype("datetime64[ns]", values[1]) + arr3 = SparseArray(values, dtype=dtype) + result3 = arr3.astype("int64") + tm.assert_numpy_array_equal(result3, expected) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 89c9ba85fcfa9..2cb0e7cbd6c8b 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -403,6 +403,15 @@ def test_astype_int(self, dtype): assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) + def test_astype_to_sparse_dt64(self): + # GH#50082 + dti = pd.date_range("2016-01-01", periods=4) + dta = dti._data + result = dta.astype("Sparse[datetime64[ns]]") + + assert result.dtype == "Sparse[datetime64[ns]]" + assert (result == dta).all() + def test_tz_setter_raises(self): arr = DatetimeArray._from_sequence( ["2000"], dtype=DatetimeTZDtype(tz="US/Central") diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 015c121ca684a..7bb3071b3f63c 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1829,7 +1829,6 @@ def test_is_timedelta(self): assert is_timedelta64_ns_dtype(tdi) assert is_timedelta64_ns_dtype(tdi.astype("timedelta64[ns]")) - # Conversion to Int64Index: assert not is_timedelta64_ns_dtype(Index([], dtype=np.float64)) assert not is_timedelta64_ns_dtype(Index([], dtype=np.int64)) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 21c49807b7743..eeddf443dba86 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -33,13 +33,14 @@ import pandas as pd from pandas import ( DatetimeIndex, + Index, NaT, Series, TimedeltaIndex, date_range, ) import pandas._testing as tm -from pandas.core.api import Float64Index +from pandas.core.api import NumericIndex fix_now = pd.Timestamp("2021-01-01") fix_utcnow = pd.Timestamp("2021-01-01", tz="UTC") @@ -355,7 +356,7 @@ def test_decimal(self): tm.assert_series_equal(result, ~expected) # index - idx = pd.Index(arr) + idx = Index(arr) expected = np.array([False, True]) result = isna(idx) tm.assert_numpy_array_equal(result, expected) @@ -404,10 +405,10 @@ def test_array_equivalent(dtype_equal): np.array(["a", "b", "c", "d"]), np.array(["e", "e"]), dtype_equal=dtype_equal ) assert array_equivalent( - Float64Index([0, np.nan]), Float64Index([0, np.nan]), dtype_equal=dtype_equal + NumericIndex([0, np.nan]), NumericIndex([0, np.nan]), dtype_equal=dtype_equal ) assert not array_equivalent( - Float64Index([0, np.nan]), Float64Index([1, np.nan]), dtype_equal=dtype_equal + NumericIndex([0, np.nan]), NumericIndex([1, np.nan]), dtype_equal=dtype_equal ) assert array_equivalent( DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan]), dtype_equal=dtype_equal @@ -559,15 +560,15 @@ def test_array_equivalent_nested(): def test_array_equivalent_index_with_tuples(): # GH#48446 - idx1 = pd.Index(np.array([(pd.NA, 4), (1, 1)], dtype="object")) - idx2 = pd.Index(np.array([(1, 1), (pd.NA, 4)], dtype="object")) + idx1 = Index(np.array([(pd.NA, 4), (1, 1)], dtype="object")) + idx2 = Index(np.array([(1, 1), (pd.NA, 4)], dtype="object")) assert not array_equivalent(idx1, idx2) assert not idx1.equals(idx2) assert not array_equivalent(idx2, idx1) assert not idx2.equals(idx1) - idx1 = pd.Index(np.array([(4, pd.NA), (1, 1)], dtype="object")) - idx2 = pd.Index(np.array([(1, 1), (4, pd.NA)], dtype="object")) + idx1 = Index(np.array([(4, pd.NA), (1, 1)], dtype="object")) + idx2 = Index(np.array([(1, 1), (4, pd.NA)], dtype="object")) assert not array_equivalent(idx1, idx2) assert not idx1.equals(idx2) assert not array_equivalent(idx2, idx1) diff --git a/pandas/tests/extension/base/constructors.py b/pandas/tests/extension/base/constructors.py index b9f8f8512a995..29766ff392296 100644 --- a/pandas/tests/extension/base/constructors.py +++ b/pandas/tests/extension/base/constructors.py @@ -119,7 +119,7 @@ def test_construct_empty_dataframe(self, dtype): # GH 33623 result = pd.DataFrame(columns=["a"], dtype=dtype) expected = pd.DataFrame( - {"a": pd.array([], dtype=dtype)}, index=pd.Index([], dtype="object") + {"a": pd.array([], dtype=dtype)}, index=pd.RangeIndex(0) ) self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 3d43dc47b5280..cab81f864d8d8 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -55,7 +55,7 @@ def test_dropna_frame(self, data_missing): # axis = 1 result = df.dropna(axis="columns") - expected = pd.DataFrame(index=[0, 1]) + expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([])) self.assert_frame_equal(result, expected) # multiple diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index e4a92ecc5dac1..971ce2e467aa9 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -84,7 +84,7 @@ def test_xs_corner(self): # no columns but Index(dtype=object) df = DataFrame(index=["a", "b", "c"]) result = df.xs("a") - expected = Series([], name="a", index=Index([]), dtype=np.float64) + expected = Series([], name="a", dtype=np.float64) tm.assert_series_equal(result, expected) def test_xs_duplicates(self): diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 9d56dba9b480d..4c74f4782578e 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -387,6 +387,22 @@ def test_astype_from_object_to_datetime_unit(self, unit): with pytest.raises(TypeError, match="Cannot cast"): df.astype(f"M8[{unit}]") + @pytest.mark.parametrize("unit", ["Y", "M", "W", "D", "h", "m"]) + def test_astype_from_object_to_timedelta_unit(self, unit): + vals = [ + ["1 Day", "2 Days", "3 Days"], + ["4 Days", "5 Days", "6 Days"], + ] + df = DataFrame(vals, dtype=object) + msg = ( + r"Cannot convert from timedelta64\[ns\] to timedelta64\[.*\]. " + "Supported resolutions are 's', 'ms', 'us', 'ns'" + ) + with pytest.raises(ValueError, match=msg): + # TODO: this is ValueError while for DatetimeArray it is TypeError; + # get these consistent + df.astype(f"m8[{unit}]") + @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) def test_astype_from_datetimelike_to_object(self, dtype, unit): diff --git a/pandas/tests/frame/methods/test_count.py b/pandas/tests/frame/methods/test_count.py index 43eb96f7f32d9..1553a8a86305d 100644 --- a/pandas/tests/frame/methods/test_count.py +++ b/pandas/tests/frame/methods/test_count.py @@ -28,7 +28,7 @@ def test_count(self): df = DataFrame() result = df.count() - expected = Series(0, index=[]) + expected = Series(dtype="int64") tm.assert_series_equal(result, expected) def test_count_objects(self, float_string_frame): diff --git a/pandas/tests/frame/methods/test_get_numeric_data.py b/pandas/tests/frame/methods/test_get_numeric_data.py index 8628b76f54b1d..456dfe1075981 100644 --- a/pandas/tests/frame/methods/test_get_numeric_data.py +++ b/pandas/tests/frame/methods/test_get_numeric_data.py @@ -17,7 +17,7 @@ def test_get_numeric_data_preserve_dtype(self): # get the numeric data obj = DataFrame({"A": [1, "2", 3.0]}) result = obj._get_numeric_data() - expected = DataFrame(index=[0, 1, 2], dtype=object) + expected = DataFrame(dtype=object, index=pd.RangeIndex(3), columns=[]) tm.assert_frame_equal(result, expected) def test_get_numeric_data(self): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 6826b15596850..93e1bcc113765 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -420,7 +420,7 @@ def test_quantile_datetime(self): tm.assert_series_equal(result, expected) result = df[["a", "c"]].quantile([0.5], numeric_only=True) - expected = DataFrame(index=[0.5]) + expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -451,7 +451,7 @@ def test_quantile_dt64_empty(self, dtype, interp_method): interpolation=interpolation, method=method, ) - expected = DataFrame(index=[0.5]) + expected = DataFrame(index=[0.5], columns=[]) tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("invalid", [-1, 2, [0.5, -1], [0.5, 2]]) diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 5f648c76d0aa4..271a32017dd97 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -483,7 +483,7 @@ def test_rank_object_first(self, frame_or_series, na_option, ascending, expected "data,expected", [ ({"a": [1, 2, "a"], "b": [4, 5, 6]}, DataFrame({"b": [1.0, 2.0, 3.0]})), - ({"a": [1, 2, "a"]}, DataFrame(index=range(3))), + ({"a": [1, 2, "a"]}, DataFrame(index=range(3), columns=[])), ], ) def test_rank_mixed_axis_zero(self, data, expected): diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index 239c31b2f9521..0415d0816d9f8 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -334,7 +334,7 @@ def test_rename_axis_style_raises(self): # Duplicates with pytest.raises(TypeError, match="multiple values"): - df.rename(id, mapper=id) # pylint: disable=redundant-keyword-arg + df.rename(id, mapper=id) def test_rename_positional_raises(self): # GH 29136 diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 7487b2c70a264..638387452903b 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -390,7 +390,7 @@ def test_to_csv_dup_cols(self, nrows): def test_to_csv_empty(self): df = DataFrame(index=np.arange(10)) result, expected = self._return_result_expected(df, 1000) - tm.assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_column_type=False) @pytest.mark.slow def test_to_csv_chunksize(self): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index ef80cc847a5b8..8051fff7b329d 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -55,7 +55,6 @@ SparseArray, TimedeltaArray, ) -from pandas.core.api import Int64Index MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"] MIXED_INT_DTYPES = [ @@ -194,13 +193,11 @@ def test_series_with_name_not_matching_column(self): [ lambda: DataFrame(), lambda: DataFrame(None), - lambda: DataFrame({}), lambda: DataFrame(()), lambda: DataFrame([]), lambda: DataFrame(_ for _ in []), lambda: DataFrame(range(0)), lambda: DataFrame(data=None), - lambda: DataFrame(data={}), lambda: DataFrame(data=()), lambda: DataFrame(data=[]), lambda: DataFrame(data=(_ for _ in [])), @@ -214,6 +211,20 @@ def test_empty_constructor(self, constructor): assert len(result.columns) == 0 tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "constructor", + [ + lambda: DataFrame({}), + lambda: DataFrame(data={}), + ], + ) + def test_empty_constructor_object_index(self, constructor): + expected = DataFrame(columns=Index([])) + result = constructor() + assert len(result.index) == 0 + assert len(result.columns) == 0 + tm.assert_frame_equal(result, expected, check_index_type=True) + @pytest.mark.parametrize( "emptylike,expected_index,expected_columns", [ @@ -626,7 +637,7 @@ def test_constructor_2d_index(self): df = DataFrame([[1]], columns=[[1]], index=[1, 2]) expected = DataFrame( [1, 1], - index=Int64Index([1, 2], dtype="int64"), + index=Index([1, 2], dtype="int64"), columns=MultiIndex(levels=[[1]], codes=[[0]]), ) tm.assert_frame_equal(df, expected) @@ -1392,7 +1403,17 @@ def test_constructor_generator(self): def test_constructor_list_of_dicts(self): result = DataFrame([{}]) - expected = DataFrame(index=[0]) + expected = DataFrame(index=RangeIndex(1), columns=[]) + tm.assert_frame_equal(result, expected) + + def test_constructor_ordered_dict_nested_preserve_order(self): + # see gh-18166 + nested1 = OrderedDict([("b", 1), ("a", 2)]) + nested2 = OrderedDict([("b", 2), ("a", 5)]) + data = OrderedDict([("col2", nested1), ("col1", nested2)]) + result = DataFrame(data) + data = {"col2": [1, 2], "col1": [2, 5]} + expected = DataFrame(data=data, index=["b", "a"]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("dict_type", [dict, OrderedDict]) @@ -1753,7 +1774,7 @@ def test_constructor_empty_with_string_dtype(self): def test_constructor_empty_with_string_extension(self, nullable_string_dtype): # GH 34915 - expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype) + expected = DataFrame(columns=["c1"], dtype=nullable_string_dtype) df = DataFrame(columns=["c1"], dtype=nullable_string_dtype) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6c6a923e363ae..f9f3868375ed5 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1163,7 +1163,7 @@ def test_any_all_bool_only(self): ) result = df.all(bool_only=True) - expected = Series(dtype=np.bool_) + expected = Series(dtype=np.bool_, index=[]) tm.assert_series_equal(result, expected) df = DataFrame( diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index cb796e1b1ec64..f67e2125bbf54 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1251,7 +1251,8 @@ def test_stack_timezone_aware_values(): @pytest.mark.parametrize("dropna", [True, False]) def test_stack_empty_frame(dropna): # GH 36113 - expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + levels = [np.array([], dtype=np.int64), np.array([], dtype=np.int64)] + expected = Series(dtype=np.float64, index=MultiIndex(levels=levels, codes=[[], []])) result = DataFrame(dtype=np.float64).stack(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 03b917edd357b..659703c4d6d8f 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -674,7 +674,7 @@ def test_no_args_raises(self): # but we do allow this result = gr.agg([]) - expected = DataFrame() + expected = DataFrame(columns=[]) tm.assert_frame_equal(result, expected) def test_series_named_agg_duplicates_no_raises(self): diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index dc09a2e0ea6ad..08c25fb74be83 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -103,7 +103,9 @@ def test_cython_agg_nothing_to_agg(): with pytest.raises(TypeError, match="Could not convert"): frame[["b"]].groupby(frame["a"]).mean() result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) - expected = DataFrame([], index=frame["a"].sort_values().drop_duplicates()) + expected = DataFrame( + [], index=frame["a"].sort_values().drop_duplicates(), columns=[] + ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 0d5ab9a4f1acc..04f48bb7cfabc 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -16,7 +16,6 @@ bdate_range, ) import pandas._testing as tm -from pandas.core.api import Int64Index from pandas.tests.groupby import get_groupby_method_args @@ -799,11 +798,9 @@ def test_apply_with_mixed_types(): def test_func_returns_object(): # GH 28652 - df = DataFrame({"a": [1, 2]}, index=Int64Index([1, 2])) + df = DataFrame({"a": [1, 2]}, index=Index([1, 2])) result = df.groupby("a").apply(lambda g: g.index) - expected = Series( - [Int64Index([1]), Int64Index([2])], index=Int64Index([1, 2], name="a") - ) + expected = Series([Index([1]), Index([2])], index=Index([1, 2], name="a")) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 2cc4d376c6abe..26cdfa2291021 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -14,10 +14,6 @@ date_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, -) from pandas.core.groupby.grouper import Grouping # selection @@ -687,15 +683,19 @@ def test_list_grouper_with_nat(self): [ ( "transform", - Series(name=2, dtype=np.float64, index=Index([])), + Series(name=2, dtype=np.float64), ), ( "agg", - Series(name=2, dtype=np.float64, index=Float64Index([], name=1)), + Series( + name=2, dtype=np.float64, index=Index([], dtype=np.float64, name=1) + ), ), ( "apply", - Series(name=2, dtype=np.float64, index=Float64Index([], name=1)), + Series( + name=2, dtype=np.float64, index=Index([], dtype=np.float64, name=1) + ), ), ], ) @@ -759,7 +759,9 @@ def test_groupby_multiindex_level_empty(self): empty = df[df.value < 0] result = empty.groupby("id").sum() expected = DataFrame( - dtype="float64", columns=["value"], index=Int64Index([], name="id") + dtype="float64", + columns=["value"], + index=Index([], dtype=np.int64, name="id"), ) tm.assert_frame_equal(result, expected) @@ -873,7 +875,7 @@ def test_groupby_with_single_column(self): df = DataFrame({"a": list("abssbab")}) tm.assert_frame_equal(df.groupby("a").get_group("a"), df.iloc[[0, 5]]) # GH 13530 - exp = DataFrame(index=Index(["a", "b", "s"], name="a")) + exp = DataFrame(index=Index(["a", "b", "s"], name="a"), columns=[]) tm.assert_frame_equal(df.groupby("a").count(), exp) tm.assert_frame_equal(df.groupby("a").sum(), exp) diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py index 72772775b3fa1..38c4c41e8648d 100644 --- a/pandas/tests/groupby/test_min_max.py +++ b/pandas/tests/groupby/test_min_max.py @@ -10,7 +10,6 @@ Series, ) import pandas._testing as tm -from pandas.core.api import Int64Index def test_max_min_non_numeric(): @@ -123,7 +122,7 @@ def test_groupby_aggregate_period_column(func): df = DataFrame({"a": groups, "b": periods}) result = getattr(df.groupby("a")["b"], func)() - idx = Int64Index([1, 2], name="a") + idx = Index([1, 2], name="a") expected = Series(periods, index=idx, name="b") tm.assert_series_equal(result, expected) @@ -137,7 +136,7 @@ def test_groupby_aggregate_period_frame(func): df = DataFrame({"a": groups, "b": periods}) result = getattr(df.groupby("a"), func)() - idx = Int64Index([1, 2], name="a") + idx = Index([1, 2], name="a") expected = DataFrame({"b": periods}, index=idx) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_pipe.py b/pandas/tests/groupby/test_pipe.py index 7e9e0abf55b71..9eded607e8733 100644 --- a/pandas/tests/groupby/test_pipe.py +++ b/pandas/tests/groupby/test_pipe.py @@ -6,7 +6,7 @@ Index, ) import pandas._testing as tm -from pandas.core.api import Int64Index +from pandas.core.api import NumericIndex def test_pipe(): @@ -76,6 +76,6 @@ def h(df, arg3): ser = pd.Series([1, 1, 2, 2, 3, 3]) result = ser.groupby(ser).pipe(lambda grp: grp.sum() * grp.count()) - expected = pd.Series([4, 8, 12], index=Int64Index([1, 2, 3])) + expected = pd.Series([4, 8, 12], index=NumericIndex([1, 2, 3], dtype=np.int64)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8bdbc86d8659c..d0c8b53f13399 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1466,8 +1466,8 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) @pytest.mark.parametrize( "func, series, expected_values", [ - (Series.sort_values, False, [4, 5, 3, 1, 2]), - (lambda x: x.head(1), False, ValueError), + (Series.sort_values, False, [5, 4, 3, 2, 1]), + (lambda x: x.head(1), False, [5.0, np.nan, 3, 2, np.nan]), # SeriesGroupBy already has correct behavior (Series.sort_values, True, [5, 4, 3, 2, 1]), (lambda x: x.head(1), True, [5.0, np.nan, 3.0, 2.0, np.nan]), @@ -1475,7 +1475,7 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func) ) @pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) @pytest.mark.parametrize("keys_in_index", [True, False]) -def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_index): +def test_transform_aligns(func, series, expected_values, keys, keys_in_index): # GH#45648 - transform should align with the input's index df = DataFrame({"a1": [1, 1, 3, 2, 2], "b": [5, 4, 3, 2, 1]}) if "a2" in keys: @@ -1487,19 +1487,11 @@ def test_transform_aligns_depr(func, series, expected_values, keys, keys_in_inde if series: gb = gb["b"] - warn = None if series else FutureWarning - msg = "returning a DataFrame in groupby.transform will align" - if expected_values is ValueError: - with tm.assert_produces_warning(warn, match=msg): - with pytest.raises(ValueError, match="Length mismatch"): - gb.transform(func) - else: - with tm.assert_produces_warning(warn, match=msg): - result = gb.transform(func) - expected = DataFrame({"b": expected_values}, index=df.index) - if series: - expected = expected["b"] - tm.assert_equal(result, expected) + result = gb.transform(func) + expected = DataFrame({"b": expected_values}, index=df.index) + if series: + expected = expected["b"] + tm.assert_equal(result, expected) @pytest.mark.parametrize("keys", ["A", ["A", "B"]]) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 887766dd3fc29..04d1d8204a346 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas.errors import InvalidIndexError - import pandas as pd from pandas import ( DatetimeIndex, @@ -405,75 +403,6 @@ def test_get_loc_key_unit_mismatch_not_castable(self): assert key not in dti - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_method_exact_match(self, method): - idx = date_range("2000-01-01", periods=3) - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - if method is not None: - assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1 - - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc(self): - idx = date_range("2000-01-01", periods=3) - - assert idx.get_loc("2000-01-01", method="nearest") == 0 - assert idx.get_loc("2000-01-01T12", method="nearest") == 1 - - assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1 - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D")) - == 1 - ) - assert ( - idx.get_loc( - "2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D") - ) - == 1 - ) - assert ( - idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1 - ) - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo") - with pytest.raises(KeyError, match="'2000-01-01T03'"): - idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours") - with pytest.raises( - ValueError, match="tolerance size must match target index size" - ): - idx.get_loc( - "2000-01-01", - method="nearest", - tolerance=[ - pd.Timedelta("1day").to_timedelta64(), - pd.Timedelta("1day").to_timedelta64(), - ], - ) - - assert idx.get_loc("2000", method="nearest") == slice(0, 3) - assert idx.get_loc("2000-01", method="nearest") == slice(0, 3) - - assert idx.get_loc("1999", method="nearest") == 0 - assert idx.get_loc("2001", method="nearest") == 2 - - with pytest.raises(KeyError, match="'1999'"): - idx.get_loc("1999", method="pad") - with pytest.raises(KeyError, match="'2001'"): - idx.get_loc("2001", method="backfill") - - with pytest.raises(KeyError, match="'foobar'"): - idx.get_loc("foobar") - with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"): - idx.get_loc(slice(2)) - - idx = DatetimeIndex(["2000-01-01", "2000-01-04"]) - assert idx.get_loc("2000-01-02", method="nearest") == 0 - assert idx.get_loc("2000-01-03", method="nearest") == 1 - assert idx.get_loc("2000-01", method="nearest") == slice(0, 2) - def test_get_loc_time_obj(self): # time indexing idx = date_range("2000-01-01", periods=24, freq="H") @@ -486,11 +415,6 @@ def test_get_loc_time_obj(self): expected = np.array([]) tm.assert_numpy_array_equal(result, expected, check_dtype=False) - msg = "cannot yet lookup inexact labels when key is a time object" - with pytest.raises(NotImplementedError, match=msg): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - idx.get_loc(time(12, 30), method="pad") - def test_get_loc_time_obj2(self): # GH#8667 @@ -525,18 +449,6 @@ def test_get_loc_time_nat(self): expected = np.array([], dtype=np.intp) tm.assert_numpy_array_equal(loc, expected) - def test_get_loc_tz_aware(self): - # https://github.com/pandas-dev/pandas/issues/32140 - dti = date_range( - Timestamp("2019-12-12 00:00:00", tz="US/Eastern"), - Timestamp("2019-12-13 00:00:00", tz="US/Eastern"), - freq="5s", - ) - key = Timestamp("2019-12-12 10:19:25", tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - result = dti.get_loc(key, method="nearest") - assert result == 7433 - def test_get_loc_nat(self): # GH#20464 index = DatetimeIndex(["1/3/2000", "NaT"]) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index c8d7470032e5f..98c21fad1f8c2 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -791,6 +791,13 @@ def test_is_overlapping(self, start, shift, na_value, closed): result = index.is_overlapping assert result is expected + # intervals with duplicate left values + a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85] + b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90] + index = IntervalIndex.from_arrays(a, b, closed="right") + result = index.is_overlapping + assert result is False + @pytest.mark.parametrize( "tuples", [ diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 8803862615858..fb6f56b0fcba7 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -12,6 +12,11 @@ from pandas.core.api import UInt64Index +def test_infer_objects(idx): + with pytest.raises(NotImplementedError, match="to_frame"): + idx.infer_objects() + + def test_shift(idx): # GH8083 test the base class for shift diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index 4c879c8ff5736..31c5ab333ecfa 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -570,10 +570,6 @@ def test_get_loc(self, idx): with pytest.raises(KeyError, match=r"^'quux'$"): idx.get_loc("quux") - msg = "only the default get_loc method is currently supported for MultiIndex" - with pytest.raises(NotImplementedError, match=msg): - idx.get_loc("foo", method="nearest") - # 3 levels index = MultiIndex( levels=[Index(np.arange(4)), Index(np.arange(4)), Index(np.arange(4))], diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py index 9811cd3ac0211..5c4596b0d9503 100644 --- a/pandas/tests/indexes/numeric/test_indexing.py +++ b/pandas/tests/indexes/numeric/test_indexing.py @@ -20,99 +20,27 @@ def index_large(): class TestGetLoc: - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - def test_get_loc(self, method): + def test_get_loc(self): index = Index([0, 1, 2]) - warn = None if method is None else FutureWarning + assert index.get_loc(1) == 1 - with tm.assert_produces_warning(warn, match="deprecated"): - assert index.get_loc(1, method=method) == 1 - - if method: - with tm.assert_produces_warning(warn, match="deprecated"): - assert index.get_loc(1, method=method, tolerance=0) == 1 - - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_raises_bad_label(self, method): - index = Index([0, 1, 2]) - if method: - msg = "not supported between" - err = TypeError - else: - msg = r"\[1, 2\]" - err = InvalidIndexError - - with pytest.raises(err, match=msg): - index.get_loc([1, 2], method=method) - - @pytest.mark.parametrize( - "method,loc", [("pad", 1), ("backfill", 2), ("nearest", 1)] - ) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_tolerance(self, method, loc): - index = Index([0, 1, 2]) - assert index.get_loc(1.1, method) == loc - assert index.get_loc(1.1, method, tolerance=1) == loc - - @pytest.mark.parametrize("method", ["pad", "backfill", "nearest"]) - def test_get_loc_outside_tolerance_raises(self, method): - index = Index([0, 1, 2]) - with pytest.raises(KeyError, match="1.1"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc(1.1, method, tolerance=0.05) - - def test_get_loc_bad_tolerance_raises(self): - index = Index([0, 1, 2]) - with pytest.raises(ValueError, match="must be numeric"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc(1.1, "nearest", tolerance="invalid") - - def test_get_loc_tolerance_no_method_raises(self): + def test_get_loc_raises_bad_label(self): index = Index([0, 1, 2]) - with pytest.raises(ValueError, match="tolerance .* valid if"): - index.get_loc(1.1, tolerance=1) - - def test_get_loc_raises_missized_tolerance(self): - index = Index([0, 1, 2]) - with pytest.raises(ValueError, match="tolerance size must match"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc(1.1, "nearest", tolerance=[1, 1]) + with pytest.raises(InvalidIndexError, match=r"\[1, 2\]"): + index.get_loc([1, 2]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc_float64(self): idx = Index([0.0, 1.0, 2.0], dtype=np.float64) - for method in [None, "pad", "backfill", "nearest"]: - assert idx.get_loc(1, method) == 1 - if method is not None: - assert idx.get_loc(1, method, tolerance=0) == 1 - - for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: - assert idx.get_loc(1.1, method) == loc - assert idx.get_loc(1.1, method, tolerance=0.9) == loc with pytest.raises(KeyError, match="^'foo'$"): idx.get_loc("foo") with pytest.raises(KeyError, match=r"^1\.5$"): idx.get_loc(1.5) - with pytest.raises(KeyError, match=r"^1\.5$"): - idx.get_loc(1.5, method="pad", tolerance=0.1) with pytest.raises(KeyError, match="^True$"): idx.get_loc(True) with pytest.raises(KeyError, match="^False$"): idx.get_loc(False) - with pytest.raises(ValueError, match="must be numeric"): - idx.get_loc(1.4, method="nearest", tolerance="foo") - - with pytest.raises(ValueError, match="must contain numeric elements"): - idx.get_loc(1.4, method="nearest", tolerance=np.array(["foo"])) - - with pytest.raises( - ValueError, match="tolerance size must match target index size" - ): - idx.get_loc(1.4, method="nearest", tolerance=np.array([1, 2])) - def test_get_loc_na(self): idx = Index([np.nan, 1, 2], dtype=np.float64) assert idx.get_loc(1) == 1 @@ -145,13 +73,11 @@ def test_get_loc_missing_nan(self): idx.get_loc([np.nan]) @pytest.mark.parametrize("vals", [[1], [1.0], [Timestamp("2019-12-31")], ["test"]]) - @pytest.mark.parametrize("method", ["nearest", "pad", "backfill"]) - def test_get_loc_float_index_nan_with_method(self, vals, method): + def test_get_loc_float_index_nan_with_method(self, vals): # GH#39382 idx = Index(vals) with pytest.raises(KeyError, match="nan"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - idx.get_loc(np.nan, method=method) + idx.get_loc(np.nan) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_get_loc_numericindex_none_raises(self, dtype): diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index 38bd96921b991..a33173dc83569 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -10,20 +10,6 @@ import pandas._testing as tm -class TestGetLoc: - def test_get_loc_raises_object_nearest(self): - index = Index(["a", "c"]) - with pytest.raises(TypeError, match="unsupported operand type"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc("a", method="nearest") - - def test_get_loc_raises_object_tolerance(self): - index = Index(["a", "c"]) - with pytest.raises(TypeError, match="unsupported operand type"): - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - index.get_loc("a", method="pad", tolerance="invalid") - - class TestGetIndexer: @pytest.mark.parametrize( "method,expected", diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 58b77ce50293d..6cf942ad3d5d5 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -1,7 +1,4 @@ -from datetime import ( - datetime, - timedelta, -) +from datetime import datetime import re import numpy as np @@ -206,38 +203,42 @@ def test_getitem_seconds(self): for d in ["2013/01/01", "2013/01", "2013"]: tm.assert_series_equal(ser[d], ser) - def test_getitem_day(self): + @pytest.mark.parametrize( + "idx_range", + [ + date_range, + period_range, + ], + ) + def test_getitem_day(self, idx_range): # GH#6716 # Confirm DatetimeIndex and PeriodIndex works identically - didx = date_range(start="2013/01/01", freq="D", periods=400) - pidx = period_range(start="2013/01/01", freq="D", periods=400) - - for idx in [didx, pidx]: - # getitem against index should raise ValueError - values = [ - "2014", - "2013/02", - "2013/01/02", - "2013/02/01 9H", - "2013/02/01 09:00", - ] - for val in values: + # getitem against index should raise ValueError + idx = idx_range(start="2013/01/01", freq="D", periods=400) + values = [ + "2014", + "2013/02", + "2013/01/02", + "2013/02/01 9H", + "2013/02/01 09:00", + ] + for val in values: - # GH7116 - # these show deprecations as we are trying - # to slice with non-integer indexers - with pytest.raises(IndexError, match="only integers, slices"): - idx[val] + # GH7116 + # these show deprecations as we are trying + # to slice with non-integer indexers + with pytest.raises(IndexError, match="only integers, slices"): + idx[val] - ser = Series(np.random.rand(len(idx)), index=idx) - tm.assert_series_equal(ser["2013/01"], ser[0:31]) - tm.assert_series_equal(ser["2013/02"], ser[31:59]) - tm.assert_series_equal(ser["2014"], ser[365:]) + ser = Series(np.random.rand(len(idx)), index=idx) + tm.assert_series_equal(ser["2013/01"], ser[0:31]) + tm.assert_series_equal(ser["2013/02"], ser[31:59]) + tm.assert_series_equal(ser["2014"], ser[365:]) - invalid = ["2013/02/01 9H", "2013/02/01 09:00"] - for val in invalid: - with pytest.raises(KeyError, match=val): - ser[val] + invalid = ["2013/02/01 9H", "2013/02/01 09:00"] + for val in invalid: + with pytest.raises(KeyError, match=val): + ser[val] class TestGetLoc: @@ -331,62 +332,6 @@ def test_get_loc_integer(self): with pytest.raises(KeyError, match="46"): pi2.get_loc(46) - # TODO: This method came from test_period; de-dup with version above - @pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"]) - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc_method(self, method): - idx = period_range("2000-01-01", periods=3) - - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_timestamp(), method) == 1 - assert idx.get_loc(idx[1].to_timestamp().to_pydatetime(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - key = idx[1].asfreq("H", how="start") - with pytest.raises(KeyError, match=str(key)): - idx.get_loc(key, method=method) - - # TODO: This method came from test_period; de-dup with version above - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") - def test_get_loc3(self): - - idx = period_range("2000-01-01", periods=5)[::2] - assert idx.get_loc("2000-01-02T12", method="nearest", tolerance="1 day") == 1 - assert ( - idx.get_loc("2000-01-02T12", method="nearest", tolerance=Timedelta("1D")) - == 1 - ) - assert ( - idx.get_loc( - "2000-01-02T12", method="nearest", tolerance=np.timedelta64(1, "D") - ) - == 1 - ) - assert ( - idx.get_loc("2000-01-02T12", method="nearest", tolerance=timedelta(1)) == 1 - ) - - msg = "unit abbreviation w/o a number" - with pytest.raises(ValueError, match=msg): - idx.get_loc("2000-01-10", method="nearest", tolerance="foo") - - msg = "Input has different freq=None from PeriodArray\\(freq=D\\)" - with pytest.raises(ValueError, match=msg): - idx.get_loc("2000-01-10", method="nearest", tolerance="1 hour") - with pytest.raises(KeyError, match=r"^'2000-01-10'$"): - idx.get_loc("2000-01-10", method="nearest", tolerance="1 day") - with pytest.raises( - ValueError, match="list-like tolerance size must match target index size" - ): - idx.get_loc( - "2000-01-10", - method="nearest", - tolerance=[ - Timedelta("1 day").to_timedelta64(), - Timedelta("1 day").to_timedelta64(), - ], - ) - def test_get_loc_invalid_string_raises_keyerror(self): # GH#34240 pi = period_range("2000", periods=3, name="A") diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ea0504c6ad400..396022488aaf5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -32,12 +32,7 @@ period_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, - NumericIndex, - UInt64Index, -) +from pandas.core.api import NumericIndex from pandas.core.indexes.api import ( Index, MultiIndex, @@ -194,14 +189,14 @@ def test_constructor_from_frame_series_freq(self): def test_constructor_int_dtype_nan(self): # see gh-15187 data = [np.nan] - expected = Float64Index(data) + expected = NumericIndex(data, dtype=np.float64) result = Index(data, dtype="float") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "klass,dtype,na_val", [ - (Float64Index, np.float64, np.nan), + (NumericIndex, np.float64, np.nan), (DatetimeIndex, "datetime64[ns]", pd.NaT), ], ) @@ -309,9 +304,7 @@ def test_constructor_dtypes_timedelta(self, attr, klass): "klass", [ Index, - Float64Index, - Int64Index, - UInt64Index, + NumericIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, @@ -561,7 +554,7 @@ def test_map_tseries_indices_return_index(self, attr): def test_map_tseries_indices_accsr_return_index(self): date_index = tm.makeDateIndex(24, freq="h", name="hourly") - expected = Int64Index(range(24), name="hourly") + expected = Index(list(range(24)), dtype=np.int64, name="hourly") tm.assert_index_equal(expected, date_index.map(lambda x: x.hour), exact=True) @pytest.mark.parametrize( @@ -875,17 +868,17 @@ def test_isin_nan_common_object(self, nulls_fixture, nulls_fixture2): def test_isin_nan_common_float64(self, nulls_fixture): if nulls_fixture is pd.NaT or nulls_fixture is pd.NA: - # Check 1) that we cannot construct a Float64Index with this value + # Check 1) that we cannot construct a float64 Index with this value # and 2) that with an NaN we do not have .isin(nulls_fixture) - msg = "data is not compatible with Float64Index" + msg = "data is not compatible with NumericIndex" with pytest.raises(ValueError, match=msg): - Float64Index([1.0, nulls_fixture]) + NumericIndex([1.0, nulls_fixture], dtype=np.float64) - idx = Float64Index([1.0, np.nan]) + idx = NumericIndex([1.0, np.nan], dtype=np.float64) assert not idx.isin([nulls_fixture]).any() return - idx = Float64Index([1.0, nulls_fixture]) + idx = NumericIndex([1.0, nulls_fixture], dtype=np.float64) res = idx.isin([np.nan]) tm.assert_numpy_array_equal(res, np.array([False, True])) @@ -898,8 +891,8 @@ def test_isin_nan_common_float64(self, nulls_fixture): "index", [ Index(["qux", "baz", "foo", "bar"]), - # Float64Index overrides isin, so must be checked separately - Float64Index([1.0, 2.0, 3.0, 4.0]), + # float64 Index overrides isin, so must be checked separately + NumericIndex([1.0, 2.0, 3.0, 4.0], dtype=np.float64), ], ) def test_isin_level_kwarg(self, level, index): @@ -1068,7 +1061,7 @@ def test_outer_join_sort(self): result = left_index.join(right_index, how="outer") # right_index in this case because DatetimeIndex has join precedence - # over Int64Index + # over int64 Index with tm.assert_produces_warning(RuntimeWarning): expected = right_index.astype(object).union(left_index.astype(object)) @@ -1138,8 +1131,6 @@ def test_reindex_preserves_type_if_target_is_empty_list_or_array(self, labels): @pytest.mark.parametrize( "labels,dtype", [ - (Int64Index([]), np.int64), - (Float64Index([]), np.float64), (DatetimeIndex([]), np.datetime64), ], ) @@ -1148,10 +1139,19 @@ def test_reindex_doesnt_preserve_type_if_target_is_empty_index(self, labels, dty index = Index(list("abc")) assert index.reindex(labels)[0].dtype.type == dtype + def test_reindex_doesnt_preserve_type_if_target_is_empty_index_numeric( + self, any_real_numpy_dtype + ): + # GH7774 + dtype = any_real_numpy_dtype + index = Index(list("abc")) + labels = NumericIndex([], dtype=dtype) + assert index.reindex(labels)[0].dtype == dtype + def test_reindex_no_type_preserve_target_empty_mi(self): index = Index(list("abc")) result = index.reindex( - MultiIndex([Int64Index([]), Float64Index([])], [[], []]) + MultiIndex([Index([], np.int64), Index([], np.float64)], [[], []]) )[0] assert result.levels[0].dtype.type == np.int64 assert result.levels[1].dtype.type == np.float64 @@ -1541,7 +1541,7 @@ def test_deprecated_fastpath(): Index(np.array(["a", "b"], dtype=object), name="test", fastpath=True) with pytest.raises(TypeError, match=msg): - Int64Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True) + Index(np.array([1, 2, 3], dtype="int64"), name="test", fastpath=True) with pytest.raises(TypeError, match=msg): RangeIndex(0, 5, 2, name="test", fastpath=True) @@ -1562,40 +1562,31 @@ def test_shape_of_invalid_index(): idx[:, None] -def test_validate_1d_input(): +@pytest.mark.parametrize("dtype", [None, np.int64, np.uint64, np.float64]) +def test_validate_1d_input(dtype): # GH#27125 check that we do not have >1-dimensional input msg = "Index data must be 1-dimensional" arr = np.arange(8).reshape(2, 2, 2) with pytest.raises(ValueError, match=msg): - Index(arr) - - with pytest.raises(ValueError, match=msg): - Float64Index(arr.astype(np.float64)) - - with pytest.raises(ValueError, match=msg): - Int64Index(arr.astype(np.int64)) - - with pytest.raises(ValueError, match=msg): - UInt64Index(arr.astype(np.uint64)) + Index(arr, dtype=dtype) df = DataFrame(arr.reshape(4, 2)) with pytest.raises(ValueError, match=msg): - Index(df) + Index(df, dtype=dtype) - # GH#13601 trying to assign a multi-dimensional array to an index is not - # allowed + # GH#13601 trying to assign a multi-dimensional array to an index is not allowed ser = Series(0, range(4)) with pytest.raises(ValueError, match=msg): - ser.index = np.array([[2, 3]] * 4) + ser.index = np.array([[2, 3]] * 4, dtype=dtype) @pytest.mark.parametrize( "klass, extra_kwargs", [ [Index, {}], - [Int64Index, {}], - [Float64Index, {}], + [lambda x: NumericIndex(x, np.int64), {}], + [lambda x: NumericIndex(x, np.float64), {}], [DatetimeIndex, {}], [TimedeltaIndex, {}], [NumericIndex, {}], diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 4a1333e2b18b4..4929fc436ab98 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -31,11 +31,7 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, - UInt64Index, -) +from pandas.core.api import NumericIndex class TestIndexConstructorInference: @@ -91,11 +87,11 @@ def test_construction_list_tuples_nan(self, na_value, vtype): def test_constructor_int_dtype_float(self, dtype): # GH#18400 if is_unsigned_integer_dtype(dtype): - index_type = UInt64Index + expected_dtype = np.uint64 else: - index_type = Int64Index + expected_dtype = np.int64 - expected = index_type([0, 1, 2, 3]) + expected = NumericIndex([0, 1, 2, 3], dtype=expected_dtype) result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) tm.assert_index_equal(result, expected) @@ -292,9 +288,10 @@ def test_constructor_int_dtype_nan_raises(self, dtype): np.array([1.0, 2.0, 3.0], dtype=float), ], ) - def test_constructor_dtypes_to_int64(self, vals): - index = Index(vals, dtype=int) - assert isinstance(index, Int64Index) + def test_constructor_dtypes_to_int(self, vals, any_int_numpy_dtype): + dtype = any_int_numpy_dtype + index = NumericIndex(vals, dtype=dtype) + assert index.dtype == dtype @pytest.mark.parametrize( "vals", @@ -306,9 +303,10 @@ def test_constructor_dtypes_to_int64(self, vals): np.array([1.0, 2.0, 3.0], dtype=float), ], ) - def test_constructor_dtypes_to_float64(self, vals): - index = Index(vals, dtype=float) - assert isinstance(index, Float64Index) + def test_constructor_dtypes_to_float(self, vals, float_numpy_dtype): + dtype = float_numpy_dtype + index = NumericIndex(vals, dtype=dtype) + assert index.dtype == dtype @pytest.mark.parametrize( "vals", diff --git a/pandas/tests/indexes/test_indexing.py b/pandas/tests/indexes/test_indexing.py index 57268c07024f7..2b9c6323113d5 100644 --- a/pandas/tests/indexes/test_indexing.py +++ b/pandas/tests/indexes/test_indexing.py @@ -19,6 +19,8 @@ from pandas.errors import InvalidIndexError +from pandas.core.dtypes.common import is_float_dtype + from pandas import ( NA, DatetimeIndex, @@ -31,11 +33,7 @@ TimedeltaIndex, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, - UInt64Index, -) +from pandas.core.api import NumericIndex class TestTake: @@ -114,12 +112,12 @@ def test_index_contains(self, index, val): (Index([0, 1, 2, np.nan]), 4), (Index([0, 1, 2, np.inf]), np.nan), (Index([0, 1, 2, np.nan]), np.inf), - # Checking if np.inf in Int64Index should not cause an OverflowError + # Checking if np.inf in int64 Index should not cause an OverflowError # Related to GH 16957 - (Int64Index([0, 1, 2]), np.inf), - (Int64Index([0, 1, 2]), np.nan), - (UInt64Index([0, 1, 2]), np.inf), - (UInt64Index([0, 1, 2]), np.nan), + (Index([0, 1, 2], dtype=np.int64), np.inf), + (Index([0, 1, 2], dtype=np.int64), np.nan), + (Index([0, 1, 2], dtype=np.uint64), np.inf), + (Index([0, 1, 2], dtype=np.uint64), np.nan), ], ) def test_index_not_contains(self, index, val): @@ -139,20 +137,20 @@ def test_mixed_index_not_contains(self, index, val): # GH#19860 assert val not in index - def test_contains_with_float_index(self): + def test_contains_with_float_index(self, any_real_numpy_dtype): # GH#22085 - integer_index = Int64Index([0, 1, 2, 3]) - uinteger_index = UInt64Index([0, 1, 2, 3]) - float_index = Float64Index([0.1, 1.1, 2.2, 3.3]) + dtype = any_real_numpy_dtype + data = [0, 1, 2, 3] if not is_float_dtype(dtype) else [0.1, 1.1, 2.2, 3.3] + index = NumericIndex(data, dtype=dtype) - for index in (integer_index, uinteger_index): + if not is_float_dtype(index.dtype): assert 1.1 not in index assert 1.0 in index assert 1 in index - - assert 1.1 in float_index - assert 1.0 not in float_index - assert 1 not in float_index + else: + assert 1.1 in index + assert 1.0 not in index + assert 1 not in index def test_contains_requires_hashable_raises(self, index): if isinstance(index, MultiIndex): diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 12aece23738ec..cc166f9f32a34 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,7 +1,4 @@ -from datetime import ( - datetime, - timedelta, -) +from datetime import datetime import re import numpy as np @@ -91,35 +88,9 @@ def test_get_loc_key_unit_mismatch_not_castable(self): assert key not in tdi - @pytest.mark.filterwarnings("ignore:Passing method:FutureWarning") def test_get_loc(self): idx = to_timedelta(["0 days", "1 days", "2 days"]) - for method in [None, "pad", "backfill", "nearest"]: - assert idx.get_loc(idx[1], method) == 1 - assert idx.get_loc(idx[1].to_pytimedelta(), method) == 1 - assert idx.get_loc(str(idx[1]), method) == 1 - - assert idx.get_loc(idx[1], "pad", tolerance=Timedelta(0)) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=np.timedelta64(0, "s")) == 1 - assert idx.get_loc(idx[1], "pad", tolerance=timedelta(0)) == 1 - - with pytest.raises(ValueError, match="unit abbreviation w/o a number"): - idx.get_loc(idx[1], method="nearest", tolerance="foo") - - with pytest.raises(ValueError, match="tolerance size must match"): - idx.get_loc( - idx[1], - method="nearest", - tolerance=[ - Timedelta(0).to_timedelta64(), - Timedelta(0).to_timedelta64(), - ], - ) - - for method, loc in [("pad", 1), ("backfill", 2), ("nearest", 1)]: - assert idx.get_loc("1 day 1 hour", method) == loc - # GH 16909 assert idx.get_loc(idx[1].to_timedelta64()) == 1 diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index a76e5fc1c5f57..cce66355ef5a5 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -16,10 +16,7 @@ import pandas as pd import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, -) +from pandas.core.api import NumericIndex ############################################################### # Index / Series common tests which may trigger dtype coercions @@ -217,7 +214,7 @@ def test_insert_index_object(self, insert, coerced_val, coerced_dtype): ], ) def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): - obj = Int64Index([1, 2, 3, 4]) + obj = NumericIndex([1, 2, 3, 4], dtype=np.int64) assert obj.dtype == np.int64 exp = pd.Index([1, coerced_val, 2, 3, 4]) @@ -233,7 +230,7 @@ def test_insert_index_int64(self, insert, coerced_val, coerced_dtype): ], ) def test_insert_index_float64(self, insert, coerced_val, coerced_dtype): - obj = Float64Index([1.0, 2.0, 3.0, 4.0]) + obj = NumericIndex([1.0, 2.0, 3.0, 4.0], dtype=np.float64) assert obj.dtype == np.float64 exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0]) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 186cba62c138f..0480278877398 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -8,10 +8,7 @@ Series, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, -) +from pandas.core.api import NumericIndex def gen_obj(klass, index): @@ -264,9 +261,9 @@ def test_slice_integer(self): # oob indicates if we are out of bounds # of positional indexing for index, oob in [ - (Int64Index(range(5)), False), + (NumericIndex(np.arange(5, dtype=np.int64)), False), (RangeIndex(5), False), - (Int64Index(range(5)) + 10, True), + (NumericIndex(np.arange(5, dtype=np.int64) + 10), True), ]: # s is an in-range index @@ -496,7 +493,7 @@ def test_floating_misc(self, indexer_sl): # fancy floats/integers create the correct entry (as nan) # fancy tests - expected = Series([2, 0], index=Float64Index([5.0, 0.0])) + expected = Series([2, 0], index=Index([5.0, 0.0], dtype=np.float64)) for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index f7e6665aad253..db088c7a2afea 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -246,7 +246,7 @@ def check(result, expected): tm.assert_frame_equal(result, expected) dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) - check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) + check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index, columns=[])) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) diff --git a/pandas/tests/indexing/test_na_indexing.py b/pandas/tests/indexing/test_na_indexing.py index 7e54bbc326880..5364cfe852430 100644 --- a/pandas/tests/indexing/test_na_indexing.py +++ b/pandas/tests/indexing/test_na_indexing.py @@ -34,7 +34,7 @@ def test_series_mask_boolean(values, dtype, mask, indexer_class, frame): if frame: if len(values) == 0: # Otherwise obj is an empty DataFrame with shape (0, 1) - obj = pd.DataFrame(dtype=dtype) + obj = pd.DataFrame(dtype=dtype, index=index) else: obj = obj.to_frame() diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 938056902e745..1ce507db618b9 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -100,12 +100,12 @@ def test_partial_set_empty_frame2(self): tm.assert_frame_equal(df, expected) - df = DataFrame() + df = DataFrame(index=Index([])) df["foo"] = Series(df.index) tm.assert_frame_equal(df, expected) - df = DataFrame() + df = DataFrame(index=Index([])) df["foo"] = df.index tm.assert_frame_equal(df, expected) @@ -135,7 +135,7 @@ def test_partial_set_empty_frame4(self): def test_partial_set_empty_frame5(self): df = DataFrame() - tm.assert_index_equal(df.columns, Index([], dtype=object)) + tm.assert_index_equal(df.columns, pd.RangeIndex(0)) df2 = DataFrame() df2[1] = Series([1], index=["foo"]) df.loc[:, 1] = Series([1], index=["foo"]) @@ -182,7 +182,7 @@ def test_partial_set_empty_frame_row(self): df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] result = y.reindex(columns=["A", "B", "C"]) - expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) + expected = DataFrame(columns=["A", "B", "C"]) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["C"] = expected["C"].astype("float64") diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index dc7960cde4a61..570a83eb6bf88 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -591,7 +591,7 @@ def _compare(old_mgr, new_mgr): # noops mgr = create_mgr("f: i8; g: f8") - new_mgr = mgr.convert() + new_mgr = mgr.convert(copy=True) _compare(mgr, new_mgr) # convert @@ -599,7 +599,7 @@ def _compare(old_mgr, new_mgr): mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) - new_mgr = mgr.convert() + new_mgr = mgr.convert(copy=True) assert new_mgr.iget(0).dtype == np.object_ assert new_mgr.iget(1).dtype == np.object_ assert new_mgr.iget(2).dtype == np.object_ @@ -612,7 +612,7 @@ def _compare(old_mgr, new_mgr): mgr.iset(0, np.array(["1"] * N, dtype=np.object_)) mgr.iset(1, np.array(["2."] * N, dtype=np.object_)) mgr.iset(2, np.array(["foo."] * N, dtype=np.object_)) - new_mgr = mgr.convert() + new_mgr = mgr.convert(copy=True) assert new_mgr.iget(0).dtype == np.object_ assert new_mgr.iget(1).dtype == np.object_ assert new_mgr.iget(2).dtype == np.object_ diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 822e24b224052..a204132963c94 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -1646,7 +1646,7 @@ def test_read_datetime_multiindex(self, request, engine, read_ext): pd.to_datetime("03/01/2020").to_pydatetime(), ], ) - expected = DataFrame([], columns=expected_column_index) + expected = DataFrame([], index=[], columns=expected_column_index) tm.assert_frame_equal(expected, actual) diff --git a/pandas/tests/io/formats/test_info.py b/pandas/tests/io/formats/test_info.py index 33c78baa1eedc..e33e1476af69a 100644 --- a/pandas/tests/io/formats/test_info.py +++ b/pandas/tests/io/formats/test_info.py @@ -37,7 +37,7 @@ def test_info_empty(): expected = textwrap.dedent( """\ - Index: 0 entries + RangeIndex: 0 entries Empty DataFrame\n""" ) assert result == expected diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 11ee41ed40ce8..d6999b32e6a81 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -152,8 +152,8 @@ def test_to_latex_empty_tabular(self): \begin{tabular}{l} \toprule Empty DataFrame - Columns: Index([], dtype='object') - Index: Index([], dtype='object') \\ + Columns: RangeIndex(start=0, stop=0, step=1) + Index: RangeIndex(start=0, stop=0, step=1) \\ \bottomrule \end{tabular} """ @@ -207,8 +207,8 @@ def test_to_latex_empty_longtable(self): \begin{longtable}{l} \toprule Empty DataFrame - Columns: Index([], dtype='object') - Index: Index([], dtype='object') \\ + Columns: RangeIndex(start=0, stop=0, step=1) + Index: RangeIndex(start=0, stop=0, step=1) \\ \end{longtable} """ ) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 2f3fc4d0fcba8..4edd08014050e 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -207,12 +207,15 @@ def test_roundtrip_empty(self, orient, convert_axes): empty_frame = DataFrame() data = empty_frame.to_json(orient=orient) result = read_json(data, orient=orient, convert_axes=convert_axes) - expected = empty_frame.copy() - - # TODO: both conditions below are probably bugs - if convert_axes: - expected.index = expected.index.astype(float) - expected.columns = expected.columns.astype(float) + if orient == "split": + idx = pd.Index([], dtype=(float if convert_axes else object)) + expected = DataFrame(index=idx, columns=idx) + elif orient in ["index", "columns"]: + # TODO: this condition is probably a bug + idx = pd.Index([], dtype=(float if convert_axes else object)) + expected = DataFrame(columns=idx) + else: + expected = empty_frame.copy() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/dtypes/test_empty.py b/pandas/tests/io/parser/dtypes/test_empty.py index ee02af773129a..1f709a3cd8f28 100644 --- a/pandas/tests/io/parser/dtypes/test_empty.py +++ b/pandas/tests/io/parser/dtypes/test_empty.py @@ -26,7 +26,7 @@ def test_dtype_all_columns_empty(all_parsers): parser = all_parsers result = parser.read_csv(StringIO("A,B"), dtype=str) - expected = DataFrame({"A": [], "B": []}, index=[], dtype=str) + expected = DataFrame({"A": [], "B": []}, dtype=str) tm.assert_frame_equal(result, expected) @@ -38,7 +38,6 @@ def test_empty_pass_dtype(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "two": np.empty(0, dtype=object)}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -81,7 +80,6 @@ def test_empty_with_mangled_column_pass_dtype_by_names(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -94,7 +92,6 @@ def test_empty_with_mangled_column_pass_dtype_by_indexes(all_parsers): expected = DataFrame( {"one": np.empty(0, dtype="u1"), "one.1": np.empty(0, dtype="f")}, - index=Index([], dtype=object), ) tm.assert_frame_equal(result, expected) @@ -106,7 +103,6 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(all_parsers): [Series([], name="one", dtype="u1"), Series([], name="one.1", dtype="f")], axis=1, ) - expected.index = expected.index.astype(object) data = "one,one" result = parser.read_csv(StringIO(data), dtype={0: "u1", 1: "f"}) @@ -133,11 +129,11 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): (np.float64, DataFrame(columns=["a", "b"], dtype=np.float64)), ( "category", - DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + DataFrame({"a": Categorical([]), "b": Categorical([])}), ), ( {"a": "category", "b": "category"}, - DataFrame({"a": Categorical([]), "b": Categorical([])}, index=[]), + DataFrame({"a": Categorical([]), "b": Categorical([])}), ), ("datetime64[ns]", DataFrame(columns=["a", "b"], dtype="datetime64[ns]")), ( @@ -147,28 +143,24 @@ def test_empty_with_dup_column_pass_dtype_by_indexes_raises(all_parsers): "a": Series([], dtype="timedelta64[ns]"), "b": Series([], dtype="timedelta64[ns]"), }, - index=[], ), ), ( {"a": np.int64, "b": np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ( {0: np.int64, 1: np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ( {"a": np.int64, 1: np.int32}, DataFrame( {"a": Series([], dtype=np.int64), "b": Series([], dtype=np.int32)}, - index=[], ), ), ], diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index f30aba3db917e..13c4216710f84 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -251,6 +251,7 @@ def test_index_col_multiindex_columns_no_data(all_parsers): ) expected = DataFrame( [], + index=Index([]), columns=MultiIndex.from_arrays( [["a1", "a2"], ["b1", "b2"]], names=["a0", "b0"] ), diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 1a8149ae41fcb..202e26952f590 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1249,7 +1249,7 @@ def test_parse_dates_empty_string(all_parsers): ( "a\n04.15.2016", {"parse_dates": True, "index_col": 0}, - DataFrame(index=DatetimeIndex(["2016-04-15"], name="a")), + DataFrame(index=DatetimeIndex(["2016-04-15"], name="a"), columns=[]), ), ( "a,b\n04.15.2016,09.16.2013", @@ -1264,7 +1264,8 @@ def test_parse_dates_empty_string(all_parsers): DataFrame( index=MultiIndex.from_tuples( [(datetime(2016, 4, 15), datetime(2013, 9, 16))], names=["a", "b"] - ) + ), + columns=[], ), ), ], diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 3e451239dcd40..61c493a2c368f 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -904,7 +904,7 @@ def test_skiprows_with_iterator(): expected_frames = [ DataFrame({"a": [3, 4]}), DataFrame({"a": [5, 7, 8]}, index=[2, 3, 4]), - DataFrame({"a": []}, index=[], dtype="object"), + DataFrame({"a": []}, dtype="object"), ] for i, result in enumerate(df_iter): tm.assert_frame_equal(result, expected_frames[i]) diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index bbf159845b1d6..032cb961103df 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -241,7 +241,7 @@ def test_usecols_with_integer_like_header(all_parsers, usecols, expected): def test_empty_usecols(all_parsers): data = "a,b,c\n1,2,3\n4,5,6" - expected = DataFrame() + expected = DataFrame(columns=Index([])) parser = all_parsers result = parser.read_csv(StringIO(data), usecols=set()) @@ -276,7 +276,7 @@ def test_np_array_usecols(all_parsers): } ), ), - (lambda x: False, DataFrame()), + (lambda x: False, DataFrame(columns=Index([]))), ], ) def test_callable_usecols(all_parsers, usecols, expected): diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 349fe74cb8e71..8cff9e65ce23b 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -16,12 +16,10 @@ HDFStore, Index, MultiIndex, - RangeIndex, Series, _testing as tm, concat, ) -from pandas.core.api import Int64Index from pandas.tests.io.pytables.common import ( _maybe_remove, ensure_clean_store, @@ -248,9 +246,7 @@ def test_column_multiindex(setup_path): [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"] ) df = DataFrame(np.arange(12).reshape(3, 4), columns=index) - expected = df.copy() - if isinstance(expected.index, RangeIndex): - expected.index = Int64Index(expected.index) + expected = df.set_axis(df.index.to_numpy()) with ensure_clean_store(setup_path) as store: @@ -280,9 +276,7 @@ def test_column_multiindex(setup_path): # non_index_axes name df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")) - expected = df.copy() - if isinstance(expected.index, RangeIndex): - expected.index = Int64Index(expected.index) + expected = df.set_axis(df.index.to_numpy()) with ensure_clean_store(setup_path) as store: diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index ffc5afcc70bb9..4bf79733b1957 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -1113,9 +1113,8 @@ def test_works_on_valid_markup(self, datapath): @pytest.mark.slow def test_fallback_success(self, datapath): banklist_data = datapath("io", "data", "html", "banklist.html") - self.read_html( - banklist_data, match=".*Water.*", flavor=["lxml", "html5lib"] - ) # pylint: disable=redundant-keyword-arg + + self.read_html(banklist_data, match=".*Water.*", flavor=["lxml", "html5lib"]) def test_to_html_timestamp(self): rng = date_range("2000-01-01", periods=10) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 75683a1d96bfb..ed72b5e251114 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -899,7 +899,7 @@ def test_partition_cols_pathlib(self, tmp_path, pa, df_compat, path_type): def test_empty_dataframe(self, pa): # GH #27339 - df = pd.DataFrame() + df = pd.DataFrame(index=[], columns=[]) check_round_trip(df, pa) def test_write_with_schema(self, pa): @@ -1174,7 +1174,7 @@ def test_error_on_using_partition_cols_and_partition_on( def test_empty_dataframe(self, fp): # GH #27339 - df = pd.DataFrame() + df = pd.DataFrame(index=[], columns=[]) expected = df.copy() expected.index.name = "index" check_round_trip(df, fp, expected=expected) diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py index f07a4e3b58e86..3dafe6fe61b35 100644 --- a/pandas/tests/io/test_pickle.py +++ b/pandas/tests/io/test_pickle.py @@ -595,5 +595,5 @@ def test_pickle_frame_v124_unpickle_130(): with open(path, "rb") as fd: df = pickle.load(fd) - expected = pd.DataFrame() + expected = pd.DataFrame(index=[], columns=[]) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index d9505b4d593e6..3a9aa91002730 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -807,7 +807,7 @@ def test_style_single_ok(self): "index_name, old_label, new_label", [(None, "", "new"), ("old", "old", "new"), (None, "", "")], ) - @pytest.mark.parametrize("kind", ["line", "area", "bar", "barh"]) + @pytest.mark.parametrize("kind", ["line", "area", "bar", "barh", "hist"]) def test_xlabel_ylabel_series(self, kind, index_name, old_label, new_label): # GH 9093 ser = Series([1, 2, 3, 4]) @@ -818,6 +818,9 @@ def test_xlabel_ylabel_series(self, kind, index_name, old_label, new_label): if kind == "barh": assert ax.get_xlabel() == "" assert ax.get_ylabel() == old_label + elif kind == "hist": + assert ax.get_xlabel() == "" + assert ax.get_ylabel() == "Frequency" else: assert ax.get_ylabel() == "" assert ax.get_xlabel() == old_label diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 04f147ee40e62..55e8c4e818ce3 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -90,7 +90,7 @@ def test_raises_on_non_datetimelike_index(): xp = DataFrame() msg = ( "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " - "but got an instance of 'Index'" + "but got an instance of 'RangeIndex'" ) with pytest.raises(TypeError, match=msg): xp.resample("A").mean() diff --git a/pandas/tests/reshape/concat/test_empty.py b/pandas/tests/reshape/concat/test_empty.py index 18c0645df1ceb..0d95d94782ecf 100644 --- a/pandas/tests/reshape/concat/test_empty.py +++ b/pandas/tests/reshape/concat/test_empty.py @@ -4,7 +4,7 @@ import pandas as pd from pandas import ( DataFrame, - Index, + RangeIndex, Series, concat, date_range, @@ -52,7 +52,7 @@ def test_concat_empty_series(self): res = concat([s1, s2], axis=1) exp = DataFrame( {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]}, - index=Index([0, 1, 2], dtype="O"), + index=RangeIndex(3), ) tm.assert_frame_equal(res, exp) @@ -70,7 +70,7 @@ def test_concat_empty_series(self): exp = DataFrame( {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]}, columns=["x", 0], - index=Index([0, 1, 2], dtype="O"), + index=RangeIndex(3), ) tm.assert_frame_equal(res, exp) @@ -96,7 +96,7 @@ def test_concat_empty_series_timelike(self, tz, values): "left,right,expected", [ # booleans - (np.bool_, np.int32, np.int32), + (np.bool_, np.int32, np.object_), # changed from int32 in 2.0 GH#39817 (np.bool_, np.float32, np.object_), # datetime-like ("m8[ns]", np.bool_, np.object_), @@ -109,12 +109,8 @@ def test_concat_empty_series_timelike(self, tz, values): ], ) def test_concat_empty_series_dtypes(self, left, right, expected): - warn = None - if (left is np.bool_ or right is np.bool_) and expected is not np.object_: - warn = FutureWarning - with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): - # GH#39817 - result = concat([Series(dtype=left), Series(dtype=right)]) + # GH#39817, GH#45101 + result = concat([Series(dtype=left), Series(dtype=right)]) assert result.dtype == expected @pytest.mark.parametrize( @@ -242,7 +238,7 @@ def test_concat_inner_join_empty(self): # GH 15328 df_empty = DataFrame() df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") - df_expected = DataFrame({"a": []}, index=[], dtype="int64") + df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64") for how, expected in [("inner", df_expected), ("outer", df_a)]: result = concat([df_a, df_empty], axis=1, join=how) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 4b32022e177e8..e5927aa094193 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -951,7 +951,7 @@ def test_join_empty(left_empty, how, exp): expected = DataFrame({"B": [np.nan], "A": [1], "C": [5]}) expected = expected.set_index("A") elif exp == "empty": - expected = DataFrame(index=Index([]), columns=["B", "C"], dtype="int64") + expected = DataFrame(columns=["B", "C"], dtype="int64") if how != "cross": expected = expected.rename_axis("A") diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index ab589dc26a3ac..fc2069c5d1e42 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -154,7 +154,7 @@ def test_merge_inner_join_empty(self): df_empty = DataFrame() df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") result = merge(df_empty, df_a, left_index=True, right_index=True) - expected = DataFrame({"a": []}, index=[], dtype="int64") + expected = DataFrame({"a": []}, dtype="int64") tm.assert_frame_equal(result, expected) def test_merge_common(self, df, df2): @@ -461,11 +461,7 @@ def test_merge_left_empty_right_empty(self, join_type, kwarg): left = DataFrame(columns=["a", "b", "c"]) right = DataFrame(columns=["x", "y", "z"]) - exp_in = DataFrame( - columns=["a", "b", "c", "x", "y", "z"], - index=pd.Index([], dtype=object), - dtype=object, - ) + exp_in = DataFrame(columns=["a", "b", "c", "x", "y", "z"], dtype=object) result = merge(left, right, how=join_type, **kwarg) tm.assert_frame_equal(result, exp_in) @@ -487,8 +483,6 @@ def test_merge_left_empty_right_notempty(self): columns=["a", "b", "c", "x", "y", "z"], ) exp_in = exp_out[0:0] # make empty DataFrame keeping dtype - # result will have object dtype - exp_in.index = exp_in.index.astype(object) def check1(exp, kwarg): result = merge(left, right, how="inner", **kwarg) @@ -757,14 +751,18 @@ def test_other_datetime_unit(self, unit): def test_other_timedelta_unit(self, unit): # GH 13389 df1 = DataFrame({"entity_id": [101, 102]}) - s = Series([None, None], index=[101, 102], name="days") + ser = Series([None, None], index=[101, 102], name="days") dtype = f"m8[{unit}]" - df2 = s.astype(dtype).to_frame("days") if unit in ["D", "h", "m"]: - # We get nearest supported unit, i.e. "s" - assert df2["days"].dtype == "m8[s]" + # We cannot astype, instead do nearest supported unit, i.e. "s" + msg = "Supported resolutions are 's', 'ms', 'us', 'ns'" + with pytest.raises(ValueError, match=msg): + ser.astype(dtype) + + df2 = ser.astype("m8[s]").to_frame("days") else: + df2 = ser.astype(dtype).to_frame("days") assert df2["days"].dtype == dtype result = df1.merge(df2, left_on="entity_id", right_index=True) @@ -1668,7 +1666,10 @@ def test_merge_EA_dtype(self, any_numeric_ea_dtype, how, expected_data): d1 = DataFrame([(1,)], columns=["id"], dtype=any_numeric_ea_dtype) d2 = DataFrame([(2,)], columns=["id"], dtype=any_numeric_ea_dtype) result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=any_numeric_ea_dtype) + exp_index = RangeIndex(len(expected_data)) + expected = DataFrame( + expected_data, index=exp_index, columns=["id"], dtype=any_numeric_ea_dtype + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1685,7 +1686,10 @@ def test_merge_string_dtype(self, how, expected_data, any_string_dtype): d1 = DataFrame([("a",)], columns=["id"], dtype=any_string_dtype) d2 = DataFrame([("b",)], columns=["id"], dtype=any_string_dtype) result = merge(d1, d2, how=how) - expected = DataFrame(expected_data, columns=["id"], dtype=any_string_dtype) + exp_idx = RangeIndex(len(expected_data)) + expected = DataFrame( + expected_data, index=exp_idx, columns=["id"], dtype=any_string_dtype + ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9c1a07dd3cde4..9a72a8dadf8d0 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1934,7 +1934,7 @@ def test_pivot_margins_name_unicode(self): frame, index=["foo"], aggfunc=len, margins=True, margins_name=greek ) index = Index([1, 2, 3, greek], dtype="object", name="foo") - expected = DataFrame(index=index) + expected = DataFrame(index=index, columns=[]) tm.assert_frame_equal(table, expected) def test_pivot_string_as_func(self): @@ -2107,8 +2107,8 @@ def test_pivot_table_empty_aggfunc(self, margins): result = df.pivot_table( index="A", columns="D", values="id", aggfunc=np.size, margins=margins ) - expected = DataFrame(index=Index([], dtype="int64", name="A")) - expected.columns.name = "D" + exp_cols = Index([], name="D") + expected = DataFrame(index=Index([], dtype="int64", name="A"), columns=exp_cols) tm.assert_frame_equal(result, expected) def test_pivot_table_no_column_raises(self): @@ -2342,7 +2342,7 @@ def test_pivot_duplicates(self): def test_pivot_empty(self): df = DataFrame(columns=["a", "b", "c"]) result = df.pivot(index="a", columns="b", values="c") - expected = DataFrame() + expected = DataFrame(index=[], columns=[]) tm.assert_frame_equal(result, expected, check_names=False) def test_pivot_integer_bug(self): diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 60ada18410415..698d66ebe7c29 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -97,7 +97,7 @@ def test_reindex_with_datetimes(): def test_reindex_corner(datetime_series): # (don't forget to fix this) I think it's fixed - empty = Series(dtype=object) + empty = Series(index=[]) empty.reindex(datetime_series.index, method="pad") # it works # corner case: pad empty series diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 8b18550dce746..054be774c2308 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -92,15 +92,15 @@ def test_unparseable_strings_with_dt64_dtype(self): # passed. (lambda idx: Series(index=idx), True), (lambda idx: Series(None, index=idx), True), - (lambda idx: Series({}, index=idx), True), - (lambda idx: Series((), index=idx), False), # creates a RangeIndex - (lambda idx: Series([], index=idx), False), # creates a RangeIndex - (lambda idx: Series((_ for _ in []), index=idx), False), # RangeIndex + (lambda idx: Series({}, index=idx), False), # creates an Index[object] + (lambda idx: Series((), index=idx), True), + (lambda idx: Series([], index=idx), True), + (lambda idx: Series((_ for _ in []), index=idx), True), (lambda idx: Series(data=None, index=idx), True), - (lambda idx: Series(data={}, index=idx), True), - (lambda idx: Series(data=(), index=idx), False), # creates a RangeIndex - (lambda idx: Series(data=[], index=idx), False), # creates a RangeIndex - (lambda idx: Series(data=(_ for _ in []), index=idx), False), # RangeIndex + (lambda idx: Series(data={}, index=idx), False), # creates an Index[object] + (lambda idx: Series(data=(), index=idx), True), + (lambda idx: Series(data=[], index=idx), True), + (lambda idx: Series(data=(_ for _ in []), index=idx), True), ], ) @pytest.mark.parametrize("empty_index", [None, []]) diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 6483ad37a2886..dae06a58f0e49 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -286,7 +286,7 @@ def test_multiply(self, values_for_np_reduce, box_with_array, request): expected = obj.prod(numeric_only=False) tm.assert_series_equal(result, expected) elif box is pd.Index: - # Int64Index, Index has no 'prod' + # Index has no 'prod' expected = obj._values.prod() assert result == expected else: @@ -317,7 +317,7 @@ def test_add(self, values_for_np_reduce, box_with_array): expected = obj.sum(numeric_only=False) tm.assert_series_equal(result, expected) elif box is pd.Index: - # Int64Index, Index has no 'sum' + # Index has no 'sum' expected = obj._values.sum() assert result == expected else: diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index beda123facb26..4385f71dc653f 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -128,7 +128,7 @@ def test_empty_str_methods(any_string_dtype): DataFrame(columns=[0, 1], dtype=any_string_dtype), empty.str.extract("()()", expand=False), ) - tm.assert_frame_equal(empty_df, empty.str.get_dummies()) + tm.assert_frame_equal(empty_df.set_axis([], axis=1), empty.str.get_dummies()) tm.assert_series_equal(empty_str, empty_str.str.join("")) tm.assert_series_equal(empty_int, empty.str.len()) tm.assert_series_equal(empty_object, empty_str.str.findall("a")) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index ab001d0b5a881..e5f40aa07d9e8 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -39,7 +39,6 @@ def df(): return DataFrame({"A": [1, 2, 3]}) -@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") def test_dask(df): # dask sets "compute.use_numexpr" to False, so catch the current value @@ -59,7 +58,6 @@ def test_dask(df): pd.set_option("compute.use_numexpr", olduse) -@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") @pytest.mark.filterwarnings("ignore:The __array_wrap__:DeprecationWarning") def test_dask_ufunc(): # At the time of dask 2022.01.0, dask is still directly using __array_wrap__ @@ -118,7 +116,7 @@ def test_xarray(df): @td.skip_if_no("cftime") -@td.skip_if_no("xarray", "0.10.4") +@td.skip_if_no("xarray", "0.21.0") def test_xarray_cftimeindex_nearest(): # https://github.com/pydata/xarray/issues/3751 import cftime @@ -126,10 +124,7 @@ def test_xarray_cftimeindex_nearest(): times = xarray.cftime_range("0001", periods=2) key = cftime.DatetimeGregorian(2000, 1, 1) - with tm.assert_produces_warning( - FutureWarning, match="deprecated", check_stacklevel=False - ): - result = times.get_loc(key, method="nearest") + result = times.get_indexer([key], method="nearest") expected = 1 assert result == expected @@ -158,7 +153,6 @@ def test_oo_optimized_datetime_index_unpickle(): @tm.network # Cython import warning @pytest.mark.filterwarnings("ignore:can't:ImportWarning") -@pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") @pytest.mark.filterwarnings( # patsy needs to update their imports "ignore:Using or importing the ABCs from 'collections:DeprecationWarning" diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 8417c6dd8419c..d30e3d7afcf19 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -453,9 +453,7 @@ def test_moment_functions_zero_length_pairwise(f): df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) df2["a"] = df2["a"].astype("float64") - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) - ) + df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns])) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), columns=Index(["a"], name="foo"), diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 04132ced044fc..315b3003f716b 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -197,9 +197,7 @@ def test_moment_functions_zero_length_pairwise(f): df2 = DataFrame(columns=Index(["a"], name="foo"), index=Index([], name="bar")) df2["a"] = df2["a"].astype("float64") - df1_expected = DataFrame( - index=MultiIndex.from_product([df1.index, df1.columns]), columns=Index([]) - ) + df1_expected = DataFrame(index=MultiIndex.from_product([df1.index, df1.columns])) df2_expected = DataFrame( index=MultiIndex.from_product([df2.index, df2.columns], names=["bar", "foo"]), columns=Index(["a"], name="foo"), diff --git a/pyproject.toml b/pyproject.toml index b649dc0c339f4..1f9ae620db22c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,7 +58,7 @@ matplotlib = "pandas:plotting._matplotlib" test = ['hypothesis>=5.5.3', 'pytest>=6.0', 'pytest-xdist>=1.31', 'pytest-asyncio>=0.17.0'] performance = ['bottleneck>=1.3.2', 'numba>=0.53.1', 'numexpr>=2.7.1'] timezone = ['tzdata>=2022.1'] -computation = ['scipy>=1.7.1', 'xarray>=0.19.0'] +computation = ['scipy>=1.7.1', 'xarray>=0.21.0'] fss = ['fsspec>=2021.07.0'] aws = ['s3fs>=2021.08.0'] gcp = ['gcsfs>=2021.07.0', 'pandas-gbq>=0.15.0'] @@ -113,7 +113,7 @@ all = ['beautifulsoup4>=4.9.3', 'tables>=3.6.1', 'tabulate>=0.8.9', 'tzdata>=2022.1', - 'xarray>=0.19.0', + 'xarray>=0.21.0', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3', 'zstandard>=0.15.2'] @@ -230,7 +230,6 @@ disable = [ # misc "abstract-class-instantiated", - "redundant-keyword-arg", "no-value-for-parameter", "undefined-variable", "unpacking-non-sequence",