diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 8a21d99124ec6..1a09157fabd09 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -18,10 +18,8 @@ from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc -from pandas import ( - arrays, - get_option, -) +import pandas as pd +from pandas import get_option from pandas.core.api import ( DataFrame, RangeIndex, @@ -173,11 +171,4 @@ def read_feather( return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get) elif dtype_backend == "pyarrow": - return DataFrame( - { - col_name: arrays.ArrowExtensionArray(pa_col) - for col_name, pa_col in zip( - pa_table.column_names, pa_table.itercolumns() - ) - } - ) + return pa_table.to_pandas(types_mapper=pd.ArrowDtype) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index bdc070d04bd69..b8f2645b788ea 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -54,6 +54,7 @@ from pandas.core.dtypes.generic import ABCIndex from pandas import ( + ArrowDtype, DataFrame, MultiIndex, Series, @@ -960,16 +961,8 @@ def read(self) -> DataFrame | Series: pa_table = pyarrow_json.read_json(self.data) if self.use_nullable_dtypes: if get_option("mode.dtype_backend") == "pyarrow": - from pandas.arrays import ArrowExtensionArray - - return DataFrame( - { - col_name: ArrowExtensionArray(pa_col) - for col_name, pa_col in zip( - pa_table.column_names, pa_table.itercolumns() - ) - } - ) + return pa_table.to_pandas(types_mapper=ArrowDtype) + elif get_option("mode.dtype_backend") == "pandas": from pandas.io._util import _arrow_dtype_mapping diff --git a/pandas/io/orc.py b/pandas/io/orc.py index 5336e2a14f66d..28526ec249d9d 100644 --- a/pandas/io/orc.py +++ b/pandas/io/orc.py @@ -28,7 +28,7 @@ is_unsigned_integer_dtype, ) -from pandas.core.arrays import ArrowExtensionArray +import pandas as pd from pandas.core.frame import DataFrame from pandas.io.common import get_handle @@ -99,14 +99,7 @@ def read_orc( if use_nullable_dtypes: dtype_backend = get_option("mode.dtype_backend") if dtype_backend == "pyarrow": - df = DataFrame( - { - col_name: ArrowExtensionArray(pa_col) - for col_name, pa_col in zip( - pa_table.column_names, pa_table.itercolumns() - ) - } - ) + df = pa_table.to_pandas(types_mapper=pd.ArrowDtype) else: from pandas.io._util import _arrow_dtype_mapping diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index aec31f40f8570..7dc839f47b186 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -22,10 +22,10 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import doc +import pandas as pd from pandas import ( DataFrame, MultiIndex, - arrays, get_option, ) from pandas.core.shared_docs import _shared_docs @@ -250,14 +250,11 @@ def read( if dtype_backend == "pandas": result = pa_table.to_pandas(**to_pandas_kwargs) elif dtype_backend == "pyarrow": - result = DataFrame( - { - col_name: arrays.ArrowExtensionArray(pa_col) - for col_name, pa_col in zip( - pa_table.column_names, pa_table.itercolumns() - ) - } - ) + # Incompatible types in assignment (expression has type + # "Type[ArrowDtype]", target has type overloaded function + to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment] # noqa + result = pa_table.to_pandas(**to_pandas_kwargs) + if manager == "array": result = result._as_manager("array", copy=False) return result diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 420b6212f857a..58dfc95c1e5b6 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -5,9 +5,9 @@ from pandas.core.dtypes.inference import is_integer +import pandas as pd from pandas import ( DataFrame, - arrays, get_option, ) @@ -153,12 +153,7 @@ def read(self) -> DataFrame: self.kwds["use_nullable_dtypes"] and get_option("mode.dtype_backend") == "pyarrow" ): - frame = DataFrame( - { - col_name: arrays.ArrowExtensionArray(pa_col) - for col_name, pa_col in zip(table.column_names, table.itercolumns()) - } - ) + frame = table.to_pandas(types_mapper=pd.ArrowDtype) else: frame = table.to_pandas() return self._finalize_pandas_output(frame) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 353dc4f1cbd8a..2124787e8a80e 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1034,14 +1034,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full): df["bool_with_none"] = [True, None, True] pa_table = pyarrow.Table.from_pandas(df) - expected = pd.DataFrame( - { - col_name: pd.arrays.ArrowExtensionArray(pa_column) - for col_name, pa_column in zip( - pa_table.column_names, pa_table.itercolumns() - ) - } - ) + expected = pa_table.to_pandas(types_mapper=pd.ArrowDtype) # pyarrow infers datetimes as us instead of ns expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]") expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( @@ -1059,6 +1052,20 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full): expected=expected, ) + def test_read_use_nullable_types_pyarrow_config_index(self, pa): + df = pd.DataFrame( + {"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]" + ) + expected = df.copy() + + with pd.option_context("mode.dtype_backend", "pyarrow"): + check_round_trip( + df, + engine=pa, + read_kwargs={"use_nullable_dtypes": True}, + expected=expected, + ) + class TestParquetFastParquet(Base): def test_basic(self, fp, df_full):