diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index 5f1f85d8179cd..6c4b913574d9e 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -73,7 +71,6 @@ def test_series_describe_as_index(as_index, keys): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_frame_describe_multikey(tsframe): grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() @@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe): group = grouped[col].describe() # GH 17464 - Remove duplicate MultiIndex levels group_col = MultiIndex( - levels=[[col], group.columns], + levels=[Index([col], dtype=tsframe.columns.dtype), group.columns], codes=[[0] * len(group.columns), range(len(group.columns))], ) group = DataFrame(group.values, columns=group_col, index=group.index) @@ -249,7 +246,6 @@ def test_describe_non_cython_paths(): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("dtype", [int, float, object]) @pytest.mark.parametrize( "kwargs", @@ -271,5 +267,5 @@ def test_groupby_empty_dataset(dtype, kwargs): result = df.iloc[:0].groupby("A").B.describe(**kwargs) expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0] - expected.index = Index([]) + expected.index = Index([], dtype=df.columns.dtype) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index d20b30834dea2..d9c8706ec9202 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -679,14 +677,14 @@ def test_first_multi_key_groupby_categorical(): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("method", ["first", "last", "nth"]) def test_groupby_last_first_nth_with_none(method, nulls_fixture): # GH29645 - expected = Series(["y"]) + expected = Series(["y"], dtype=object) data = Series( [nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture], index=[0, 0, 0, 0, 0], + dtype=object, ).groupby(level=0) if method == "nth": diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index bb54cbd69bd42..d42aa06d6bbfe 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -3,7 +3,6 @@ from pandas._config import using_string_dtype -from pandas.compat import HAS_PYARROW from pandas.compat.pyarrow import pa_version_under10p1 from pandas.core.dtypes.missing import na_value_for_dtype @@ -13,9 +12,6 @@ from pandas.tests.groupby import get_groupby_method_args -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False -) @pytest.mark.parametrize( "dropna, tuples, outputs", [ @@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group( tm.assert_frame_equal(grouped, expected) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False -) @pytest.mark.parametrize( "dropna, tuples, outputs", [ @@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs): tm.assert_frame_equal(grouped, expected) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False -) @pytest.mark.parametrize( "dropna, idx, expected", [ @@ -215,9 +205,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index): tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False -) @pytest.mark.parametrize( "dropna, tuples, outputs", [ @@ -299,9 +286,6 @@ def test_groupby_dropna_datetime_like_data( tm.assert_frame_equal(grouped, expected) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False -) @pytest.mark.parametrize( "dropna, data, selected_data, levels", [ diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index a65dda1570944..053dda0629571 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -6,6 +6,7 @@ from pandas._config import using_string_dtype from pandas._libs import lib +from pandas.compat import HAS_PYARROW from pandas.core.dtypes.common import ensure_platform_int @@ -372,8 +373,7 @@ def test_transform_select_columns(df): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_transform_nuisance_raises(df): +def test_transform_nuisance_raises(df, using_infer_string): # case that goes through _transform_item_by_item df.columns = ["A", "B", "B", "D"] @@ -383,10 +383,16 @@ def test_transform_nuisance_raises(df): grouped = df.groupby("A") gbc = grouped["B"] - with pytest.raises(TypeError, match="Could not convert"): + msg = "Could not convert" + if using_infer_string: + if df.columns.dtype.storage == "pyarrow": + msg = "with dtype str does not support operation 'mean'" + else: + msg = "Cannot perform reduction 'mean' with string dtype" + with pytest.raises(TypeError, match=msg): gbc.transform(lambda x: np.mean(x)) - with pytest.raises(TypeError, match="Could not convert"): + with pytest.raises(TypeError, match=msg): df.groupby("A").transform(lambda x: np.mean(x)) @@ -445,8 +451,7 @@ def test_transform_coercion(): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_groupby_transform_with_int(): +def test_groupby_transform_with_int(using_infer_string): # GH 3740, make sure that we might upcast on item-by-item transform # floats @@ -476,8 +481,14 @@ def test_groupby_transform_with_int(): "D": "foo", } ) + msg = "Could not convert" + if using_infer_string: + if HAS_PYARROW: + msg = "with dtype str does not support operation 'mean'" + else: + msg = "Cannot perform reduction 'mean' with string dtype" with np.errstate(all="ignore"): - with pytest.raises(TypeError, match="Could not convert"): + with pytest.raises(TypeError, match=msg): df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) result = df.groupby("A")[["B", "C"]].transform( lambda x: (x - x.mean()) / x.std() @@ -489,7 +500,7 @@ def test_groupby_transform_with_int(): s = Series([2, 3, 4, 10, 5, -1]) df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"}) with np.errstate(all="ignore"): - with pytest.raises(TypeError, match="Could not convert"): + with pytest.raises(TypeError, match=msg): df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) result = df.groupby("A")[["B", "C"]].transform( lambda x: (x - x.mean()) / x.std() @@ -705,7 +716,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.slow @pytest.mark.parametrize( "op, args, targop", @@ -757,6 +767,7 @@ def test_cython_transform_frame_column( "does not support operation", ".* is not supported for object dtype", "is not implemented for this dtype", + ".* is not supported for str dtype", ] ) with pytest.raises(TypeError, match=msg):