diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 13fff4d4c9f2b..4af02839d831b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -3060,6 +3060,9 @@ def dtypes_all_equal(list types not None) -> bool: """ first = types[0] for t in types[1:]: + if t is first: + # Fastpath can provide a nice boost for EADtypes + continue try: if not t == first: return False diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9917af9da7665..10b2b6e57c3bf 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -3,10 +3,16 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Sequence, + cast, +) import numpy as np +from pandas._libs import lib + from pandas.core.dtypes.astype import astype_array from pandas.core.dtypes.cast import ( common_dtype_categorical_compat, @@ -26,7 +32,10 @@ if TYPE_CHECKING: from pandas._typing import AxisInt - from pandas.core.arrays import Categorical + from pandas.core.arrays import ( + Categorical, + ExtensionArray, + ) def concat_compat(to_concat, axis: AxisInt = 0, ea_compat_axis: bool = False): @@ -38,7 +47,7 @@ def concat_compat(to_concat, axis: AxisInt = 0, ea_compat_axis: bool = False): Parameters ---------- - to_concat : array of arrays + to_concat : sequence of arrays axis : axis to provide concatenation ea_compat_axis : bool, default False For ExtensionArray compat, behave as if axis == 1 when determining @@ -48,6 +57,24 @@ def concat_compat(to_concat, axis: AxisInt = 0, ea_compat_axis: bool = False): ------- a single array, preserving the combined dtypes """ + if len(to_concat) and lib.dtypes_all_equal([obj.dtype for obj in to_concat]): + # fastpath! + obj = to_concat[0] + if isinstance(obj, np.ndarray): + to_concat_arrs = cast("Sequence[np.ndarray]", to_concat) + return np.concatenate(to_concat_arrs, axis=axis) + + to_concat_eas = cast("Sequence[ExtensionArray]", to_concat) + if ea_compat_axis: + # We have 1D objects, that don't support axis keyword + return obj._concat_same_type(to_concat_eas) + elif axis == 0: + return obj._concat_same_type(to_concat_eas) + else: + # e.g. DatetimeArray + # NB: We are assuming here that ensure_wrapped_if_arraylike has + # been called where relevant. + return obj._concat_same_type(to_concat_eas, axis=axis) # filter empty arrays # 1-d dtypes always are included here diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index cfa8b1de594a5..bfdb56147a807 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -230,10 +230,12 @@ def concatenate_managers( # we can use np.concatenate, which is more performant # than concat_compat values = np.concatenate(vals, axis=1) - else: + elif is_1d_only_ea_dtype(blk.dtype): # TODO(EA2D): special-casing not needed with 2D EAs - values = concat_compat(vals, axis=1) + values = concat_compat(vals, axis=1, ea_compat_axis=True) values = ensure_block_shape(values, ndim=2) + else: + values = concat_compat(vals, axis=1) values = ensure_wrapped_if_datetimelike(values) @@ -541,6 +543,9 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike: # if we did, the missing_arr.fill would cast to gibberish missing_arr = np.empty(self.shape, dtype=empty_dtype) missing_arr.fill(fill_value) + + if empty_dtype.kind in "mM": + missing_arr = ensure_wrapped_if_datetimelike(missing_arr) return missing_arr if (not self.indexers) and (not self.block._can_consolidate):