From 87bba19c1b380fa7709e1f56de8c82186dcb54d0 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Feb 2021 19:24:44 -0800 Subject: [PATCH 1/6] TST: collect Index tests by method --- .../indexes/datetimes/methods/test_repeat.py | 78 +++++++++++++++++++ pandas/tests/indexes/datetimes/test_ops.py | 68 ---------------- .../indexes/period/methods/test_repeat.py | 26 +++++++ pandas/tests/indexes/period/test_period.py | 72 ----------------- .../indexes/timedeltas/methods/test_repeat.py | 34 ++++++++ pandas/tests/indexes/timedeltas/test_ops.py | 35 ++------- pandas/tests/indexing/test_loc.py | 38 +++++++++ pandas/tests/reshape/concat/test_concat.py | 17 ++++ 8 files changed, 198 insertions(+), 170 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/methods/test_repeat.py create mode 100644 pandas/tests/indexes/period/methods/test_repeat.py create mode 100644 pandas/tests/indexes/timedeltas/methods/test_repeat.py diff --git a/pandas/tests/indexes/datetimes/methods/test_repeat.py b/pandas/tests/indexes/datetimes/methods/test_repeat.py new file mode 100644 index 0000000000000..81768622fd3d5 --- /dev/null +++ b/pandas/tests/indexes/datetimes/methods/test_repeat.py @@ -0,0 +1,78 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Timestamp, + date_range, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat_range(self, tz_naive_fixture): + tz = tz_naive_fixture + rng = date_range("1/1/2000", "1/1/2001") + + result = rng.repeat(5) + assert result.freq is None + assert len(result) == 5 * len(rng) + + index = date_range("2001-01-01", periods=2, freq="D", tz=tz) + exp = DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) + exp = DatetimeIndex( + ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz + ) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) + exp = DatetimeIndex( + [ + "2001-01-01", + "2001-01-01", + "2001-01-01", + "NaT", + "NaT", + "NaT", + "2003-01-01", + "2003-01-01", + "2003-01-01", + ], + tz=tz, + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + def test_repeat(self, tz_naive_fixture): + tz = tz_naive_fixture + reps = 2 + msg = "the 'axis' parameter is not supported" + + rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) + + expected_rng = DatetimeIndex( + [ + Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), + Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), + ] + ) + + res = rng.repeat(reps) + tm.assert_index_equal(res, expected_rng) + assert res.freq is None + + tm.assert_index_equal(np.repeat(rng, reps), expected_rng) + with pytest.raises(ValueError, match=msg): + np.repeat(rng, reps, axis=1) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 676c0ee99ef7c..77812e49f1fc0 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -12,7 +12,6 @@ DatetimeIndex, Index, Series, - Timestamp, bdate_range, date_range, ) @@ -46,73 +45,6 @@ def test_ops_properties_basic(self, datetime_series): with pytest.raises(AttributeError, match=msg): s.weekday - def test_repeat_range(self, tz_naive_fixture): - tz = tz_naive_fixture - rng = date_range("1/1/2000", "1/1/2001") - - result = rng.repeat(5) - assert result.freq is None - assert len(result) == 5 * len(rng) - - index = date_range("2001-01-01", periods=2, freq="D", tz=tz) - exp = DatetimeIndex( - ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz - ) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) - exp = DatetimeIndex( - ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz - ) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) - exp = DatetimeIndex( - [ - "2001-01-01", - "2001-01-01", - "2001-01-01", - "NaT", - "NaT", - "NaT", - "2003-01-01", - "2003-01-01", - "2003-01-01", - ], - tz=tz, - ) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - def test_repeat(self, tz_naive_fixture): - tz = tz_naive_fixture - reps = 2 - msg = "the 'axis' parameter is not supported" - - rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) - - expected_rng = DatetimeIndex( - [ - Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), - Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), - Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), - Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), - ] - ) - - res = rng.repeat(reps) - tm.assert_index_equal(res, expected_rng) - assert res.freq is None - - tm.assert_index_equal(np.repeat(rng, reps), expected_rng) - with pytest.raises(ValueError, match=msg): - np.repeat(rng, reps, axis=1) - @pytest.mark.parametrize( "freq,expected", [ diff --git a/pandas/tests/indexes/period/methods/test_repeat.py b/pandas/tests/indexes/period/methods/test_repeat.py new file mode 100644 index 0000000000000..fc344b06420d1 --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_repeat.py @@ -0,0 +1,26 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + period_range, +) +import pandas._testing as tm + + +class TestRepeat: + @pytest.mark.parametrize("use_numpy", [True, False]) + @pytest.mark.parametrize( + "index", + [ + period_range("2000-01-01", periods=3, freq="D"), + period_range("2001-01-01", periods=3, freq="2D"), + PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), + ], + ) + def test_repeat_freqstr(self, index, use_numpy): + # GH#10183 + expected = PeriodIndex([per for per in index for _ in range(3)]) + result = np.repeat(index, 3) if use_numpy else index.repeat(3) + tm.assert_index_equal(result, expected) + assert result.freqstr == index.freqstr diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index aabc837e25b4b..2690006279208 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -3,9 +3,7 @@ from pandas._libs.tslibs.period import IncompatibleFrequency -import pandas as pd from pandas import ( - DataFrame, DatetimeIndex, Index, NaT, @@ -49,22 +47,6 @@ def test_where(self): # This is handled in test_indexing pass - @pytest.mark.parametrize("use_numpy", [True, False]) - @pytest.mark.parametrize( - "index", - [ - period_range("2000-01-01", periods=3, freq="D"), - period_range("2001-01-01", periods=3, freq="2D"), - PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), - ], - ) - def test_repeat_freqstr(self, index, use_numpy): - # GH10183 - expected = PeriodIndex([p for p in index for _ in range(3)]) - result = np.repeat(index, 3) if use_numpy else index.repeat(3) - tm.assert_index_equal(result, expected) - assert result.freqstr == index.freqstr - def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): @@ -392,22 +374,6 @@ def test_convert_array_of_periods(self): result = Index(periods) assert isinstance(result, PeriodIndex) - def test_append_concat(self): # TODO: pd.concat test - # #1815 - d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") - d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") - - s1 = Series(np.random.randn(10), d1) - s2 = Series(np.random.randn(10), d2) - - s1 = s1.to_period() - s2 = s2.to_period() - - # drops index - result = pd.concat([s1, s2]) - assert isinstance(result.index, PeriodIndex) - assert result.index[0] == s1.index[0] - def test_pickle_freq(self): # GH2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") @@ -423,44 +389,6 @@ def test_map(self): exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) - @pytest.mark.parametrize( - "msg, key", - [ - (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), - (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), - (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), - ( - r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", - (Period(2018), Period(2016), "bar"), - ), - (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), - ( - r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", - (Period(2017), "foo", Period(2015)), - ), - (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), - ], - ) - def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): - # issue 20684 - """ - parse_time_string return parameter if type not matched. - PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. - If first argument is Period and a tuple has 3 items, - process go on not raise exception - """ - df = DataFrame( - { - "A": [Period(2019), "x1", "x2"], - "B": [Period(2018), Period(2016), "y1"], - "C": [Period(2017), "z1", Period(2015)], - "V1": [1, 2, 3], - "V2": [10, 20, 30], - } - ).set_index(["A", "B", "C"]) - with pytest.raises(KeyError, match=msg): - df.loc[key] - def test_format_empty(self): # GH35712 empty_idx = self._holder([], freq="A") diff --git a/pandas/tests/indexes/timedeltas/methods/test_repeat.py b/pandas/tests/indexes/timedeltas/methods/test_repeat.py new file mode 100644 index 0000000000000..2a9b58d1bf322 --- /dev/null +++ b/pandas/tests/indexes/timedeltas/methods/test_repeat.py @@ -0,0 +1,34 @@ +import numpy as np + +from pandas import ( + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +class TestRepeat: + def test_repeat(self): + index = timedelta_range("1 days", periods=2, freq="D") + exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) + for res in [index.repeat(2), np.repeat(index, 2)]: + tm.assert_index_equal(res, exp) + assert res.freq is None + + index = TimedeltaIndex(["1 days", "NaT", "3 days"]) + exp = TimedeltaIndex( + [ + "1 days", + "1 days", + "1 days", + "NaT", + "NaT", + "NaT", + "3 days", + "3 days", + "3 days", + ] + ) + for res in [index.repeat(3), np.repeat(index, 3)]: + tm.assert_index_equal(res, exp) + assert res.freq is None diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 4e6d69913900d..665afe5137a52 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -1,8 +1,8 @@ import numpy as np import pytest -import pandas as pd from pandas import ( + NaT, Series, TimedeltaIndex, timedelta_range, @@ -43,7 +43,7 @@ def test_value_counts_unique(self): "1 days 09:00:00", "1 days 08:00:00", "1 days 08:00:00", - pd.NaT, + NaT, ] ) @@ -53,7 +53,7 @@ def test_value_counts_unique(self): for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) - exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT]) + exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", NaT]) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: @@ -191,34 +191,9 @@ def test_infer_freq(self, freq_sample): tm.assert_index_equal(idx, result) assert result.freq == freq_sample - def test_repeat(self): - index = timedelta_range("1 days", periods=2, freq="D") - exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"]) - for res in [index.repeat(2), np.repeat(index, 2)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - - index = TimedeltaIndex(["1 days", "NaT", "3 days"]) - exp = TimedeltaIndex( - [ - "1 days", - "1 days", - "1 days", - "NaT", - "NaT", - "NaT", - "3 days", - "3 days", - "3 days", - ] - ) - for res in [index.repeat(3), np.repeat(index, 3)]: - tm.assert_index_equal(res, exp) - assert res.freq is None - def test_nat(self): - assert TimedeltaIndex._na_value is pd.NaT - assert TimedeltaIndex([])._na_value is pd.NaT + assert TimedeltaIndex._na_value is NaT + assert TimedeltaIndex([])._na_value is NaT idx = TimedeltaIndex(["1 days", "2 days"]) assert idx._can_hold_na diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 829bba5f2930d..5ce50b6e7e4c7 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -22,6 +22,7 @@ Index, IndexSlice, MultiIndex, + Period, Series, SparseDtype, Timedelta, @@ -143,6 +144,43 @@ def test_setitem_from_duplicate_axis(self): class TestLoc2: # TODO: better name, just separating out things that rely on base class + @pytest.mark.parametrize( + "msg, key", + [ + (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), + (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), + (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), + ( + r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", + (Period(2018), Period(2016), "bar"), + ), + (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), + ( + r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", + (Period(2017), "foo", Period(2015)), + ), + (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), + ], + ) + def test_contains_raise_error_if_period_index_is_in_multi_index(self, msg, key): + # GH#20684 + """ + parse_time_string return parameter if type not matched. + PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. + If first argument is Period and a tuple has 3 items, + process go on not raise exception + """ + df = DataFrame( + { + "A": [Period(2019), "x1", "x2"], + "B": [Period(2018), Period(2016), "y1"], + "C": [Period(2017), "z1", Period(2015)], + "V1": [1, 2, 3], + "V2": [10, 20, 30], + } + ).set_index(["A", "B", "C"]) + with pytest.raises(KeyError, match=msg): + df.loc[key] def test_loc_getitem_missing_unicode_key(self): df = DataFrame({"a": [1]}) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index a125f85efc8d3..47d9bddcf50ad 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -13,6 +13,7 @@ DataFrame, Index, MultiIndex, + PeriodIndex, Series, concat, date_range, @@ -24,6 +25,22 @@ class TestConcatenate: + def test_append_concat(self): + # GH#1815 + d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") + d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") + + s1 = Series(np.random.randn(10), d1) + s2 = Series(np.random.randn(10), d2) + + s1 = s1.to_period() + s2 = s2.to_period() + + # drops index + result = concat([s1, s2]) + assert isinstance(result.index, PeriodIndex) + assert result.index[0] == s1.index[0] + def test_concat_copy(self): df = DataFrame(np.random.randn(4, 3)) df2 = DataFrame(np.random.randint(0, 10, size=4).reshape(4, 1)) From 0d1253630c3d85d56e0c49396b223f809059daae Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 24 Feb 2021 21:30:56 -0800 Subject: [PATCH 2/6] TST: split big test --- pandas/tests/reshape/test_crosstab.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 1ecb408d49813..e467dbb7d49b6 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -259,6 +259,8 @@ def test_margin_dropna(self): expected.columns = Index([3, 4, "All"], name="b") tm.assert_frame_equal(actual, expected) + def test_margin_dropna2(self): + df = DataFrame( {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} ) @@ -268,6 +270,8 @@ def test_margin_dropna(self): expected.columns = Index([3.0, 4.0, "All"], name="b") tm.assert_frame_equal(actual, expected) + def test_margin_dropna3(self): + df = DataFrame( {"a": [1, np.nan, np.nan, np.nan, np.nan, 2], "b": [3, 3, 4, 4, 4, 4]} ) @@ -277,6 +281,7 @@ def test_margin_dropna(self): expected.columns = Index([3, 4, "All"], name="b") tm.assert_frame_equal(actual, expected) + def test_margin_dropna4(self): # GH 12642 # _add_margins raises KeyError: Level None not found # when margins=True and dropna=False @@ -287,6 +292,7 @@ def test_margin_dropna(self): expected.columns = Index([3, 4, "All"], name="b") tm.assert_frame_equal(actual, expected) + def test_margin_dropna5(self): df = DataFrame( {"a": [1, np.nan, np.nan, np.nan, 2, np.nan], "b": [3, np.nan, 4, 4, 4, 4]} ) @@ -296,6 +302,7 @@ def test_margin_dropna(self): expected.columns = Index([3.0, 4.0, "All"], name="b") tm.assert_frame_equal(actual, expected) + def test_margin_dropna6(self): a = np.array(["foo", "foo", "foo", "bar", "bar", "foo", "foo"], dtype=object) b = np.array(["one", "one", "two", "one", "two", np.nan, "two"], dtype=object) c = np.array( @@ -395,6 +402,12 @@ def test_crosstab_normalize(self): crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins ) + def test_crosstab_normalize_arrays(self): + # GH#12578 + df = DataFrame( + {"a": [1, 2, 2, 2, 2], "b": [3, 3, 4, 4, 4], "c": [1, 1, np.nan, 1, 1]} + ) + # Test arrays crosstab( [np.array([1, 1, 2, 2]), np.array([1, 2, 1, 2])], np.array([1, 2, 1, 2]) @@ -798,7 +811,7 @@ def test_categoricals(a_dtype, b_dtype): if not a_is_cat: expected = expected.loc[[0, 2, "All"]] expected["All"] = expected["All"].astype("int64") - print(result) - print(expected) - print(expected.loc[[0, 2, "All"]]) + repr(result) + repr(expected) + repr(expected.loc[[0, 2, "All"]]) tm.assert_frame_equal(result, expected) From cdf8b17c34e7b03b9010ffd6d2535dd20d4c0272 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Feb 2021 15:15:56 -0800 Subject: [PATCH 3/6] TST/REF: share drop_duplicates tests --- pandas/core/groupby/ops.py | 2 +- pandas/core/indexes/base.py | 2 +- pandas/tests/dtypes/test_inference.py | 16 ++-- .../datetimelike_/test_drop_duplicates.py | 80 +++++++++++++++++++ pandas/tests/indexes/datetimes/test_ops.py | 40 ---------- .../indexes/period/methods/test_is_full.py | 23 ++++++ pandas/tests/indexes/period/test_ops.py | 40 ---------- pandas/tests/indexes/period/test_period.py | 19 ----- pandas/tests/indexes/timedeltas/test_ops.py | 40 ---------- 9 files changed, 116 insertions(+), 146 deletions(-) create mode 100644 pandas/tests/indexes/datetimelike_/test_drop_duplicates.py create mode 100644 pandas/tests/indexes/period/methods/test_is_full.py diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5004d1fe08a5b..629de3ef905c2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -774,7 +774,7 @@ def _aggregate_series_pure_python(self, obj: Series, func: F): counts[label] = group.shape[0] result[label] = res - result = lib.maybe_convert_objects(result, try_float=0) + result = lib.maybe_convert_objects(result, try_float=False) result = maybe_cast_result(result, obj, numeric_only=True) return result, counts diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e633d6b28a8c5..57da8e9639a28 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1152,7 +1152,7 @@ def _format_with_header( values = self._values if is_object_dtype(values.dtype): - values = lib.maybe_convert_objects(values, safe=1) + values = lib.maybe_convert_objects(values, safe=True) result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 046256535df57..9315473257747 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -564,17 +564,23 @@ def test_maybe_convert_objects_datetime(self): [np.datetime64("2000-01-01"), np.timedelta64(1, "s")], dtype=object ) exp = arr.copy() - out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) tm.assert_numpy_array_equal(out, exp) arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object) exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]") - out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) tm.assert_numpy_array_equal(out, exp) arr = np.array([np.timedelta64(1, "s"), np.nan], dtype=object) exp = arr.copy() - out = lib.maybe_convert_objects(arr, convert_datetime=1, convert_timedelta=1) + out = lib.maybe_convert_objects( + arr, convert_datetime=True, convert_timedelta=True + ) tm.assert_numpy_array_equal(out, exp) @pytest.mark.parametrize( @@ -587,7 +593,7 @@ def test_maybe_convert_objects_datetime(self): def test_maybe_convert_objects_nullable_integer(self, exp): # GH27335 arr = np.array([2, np.NaN], dtype=object) - result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) + result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=True) tm.assert_extension_array_equal(result, exp) @@ -601,7 +607,7 @@ def test_maybe_convert_objects_bool_nan(self): def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) - result = lib.maybe_convert_objects(array, convert_datetime=1) + result = lib.maybe_convert_objects(array, convert_datetime=True) tm.assert_numpy_array_equal(result, array) diff --git a/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py new file mode 100644 index 0000000000000..c56fc84b540c0 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest + +from pandas import ( + PeriodIndex, + Series, + date_range, + period_range, + timedelta_range, +) +import pandas._testing as tm + + +class DropDuplicates: + def test_drop_duplicates_metadata(self, idx): + # GH#10115 + result = idx.drop_duplicates() + tm.assert_index_equal(idx, result) + assert idx.freq == result.freq + + idx_dup = idx.append(idx) + result = idx_dup.drop_duplicates() + + expected = idx + if not isinstance(idx, PeriodIndex): + # freq is reset except for PeriodIndex + assert idx_dup.freq is None + assert result.freq is None + expected = idx._with_freq(None) + else: + assert result.freq == expected.freq + + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "keep, expected, index", + [ + ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), + ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + False, + np.concatenate(([True] * 5, [False] * 5, [True] * 5)), + np.arange(5, 10), + ), + ], + ) + def test_drop_duplicates(self, keep, expected, index, idx): + # to check Index/Series compat + idx = idx.append(idx[:5]) + + tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) + expected = idx[~expected] + + result = idx.drop_duplicates(keep=keep) + tm.assert_index_equal(result, expected) + + result = Series(idx).drop_duplicates(keep=keep) + tm.assert_series_equal(result, Series(expected, index=index)) + + +class TestDropDuplicatesPeriodIndex(DropDuplicates): + @pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) + def freq(self, request): + return request.param + + @pytest.fixture + def idx(self, freq): + return period_range("2011-01-01", periods=10, freq=freq, name="idx") + + +class TestDropDuplicatesDatetimeIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") + + +class TestDropDuplicatesTimedeltaIndex(DropDuplicates): + @pytest.fixture + def idx(self, freq_sample): + return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 77812e49f1fc0..29af5cb456179 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -197,46 +197,6 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None - def test_drop_duplicates_metadata(self, freq_sample): - # GH 10115 - idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") - result = idx.drop_duplicates() - tm.assert_index_equal(idx, result) - assert idx.freq == result.freq - - idx_dup = idx.append(idx) - assert idx_dup.freq is None # freq is reset - result = idx_dup.drop_duplicates() - expected = idx._with_freq(None) - tm.assert_index_equal(result, expected) - assert result.freq is None - - @pytest.mark.parametrize( - "keep, expected, index", - [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - ( - False, - np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10), - ), - ], - ) - def test_drop_duplicates(self, freq_sample, keep, expected, index): - # to check Index/Series compat - idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") - idx = idx.append(idx[:5]) - - tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) - expected = idx[~expected] - - result = idx.drop_duplicates(keep=keep) - tm.assert_index_equal(result, expected) - - result = Series(idx).drop_duplicates(keep=keep) - tm.assert_series_equal(result, Series(expected, index=index)) - def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) diff --git a/pandas/tests/indexes/period/methods/test_is_full.py b/pandas/tests/indexes/period/methods/test_is_full.py new file mode 100644 index 0000000000000..490f199a59ed7 --- /dev/null +++ b/pandas/tests/indexes/period/methods/test_is_full.py @@ -0,0 +1,23 @@ +import pytest + +from pandas import PeriodIndex + + +def test_is_full(): + index = PeriodIndex([2005, 2007, 2009], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2006, 2007], freq="A") + assert index.is_full + + index = PeriodIndex([2005, 2005, 2007], freq="A") + assert not index.is_full + + index = PeriodIndex([2005, 2005, 2006], freq="A") + assert index.is_full + + index = PeriodIndex([2006, 2005, 2005], freq="A") + with pytest.raises(ValueError, match="Index is not monotonic"): + index.is_full + + assert index[:0].is_full diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 52f8de27cb6c6..4205571102ffe 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -85,46 +85,6 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), exp_idx) - @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) - def test_drop_duplicates_metadata(self, freq): - # GH 10115 - idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") - result = idx.drop_duplicates() - tm.assert_index_equal(idx, result) - assert idx.freq == result.freq - - idx_dup = idx.append(idx) # freq will not be reset - result = idx_dup.drop_duplicates() - tm.assert_index_equal(idx, result) - assert idx.freq == result.freq - - @pytest.mark.parametrize("freq", ["D", "3D", "H", "2H", "T", "2T", "S", "3S"]) - @pytest.mark.parametrize( - "keep, expected, index", - [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - ( - False, - np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10), - ), - ], - ) - def test_drop_duplicates(self, freq, keep, expected, index): - # to check Index/Series compat - idx = pd.period_range("2011-01-01", periods=10, freq=freq, name="idx") - idx = idx.append(idx[:5]) - - tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) - expected = idx[~expected] - - result = idx.drop_duplicates(keep=keep) - tm.assert_index_equal(result, expected) - - result = Series(idx).drop_duplicates(keep=keep) - tm.assert_series_equal(result, Series(expected, index=index)) - def test_order_compat(self): def _check_freq(index, expected_index): if isinstance(index, PeriodIndex): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 2690006279208..03e78af0b2bdd 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -337,25 +337,6 @@ def test_iteration(self): assert isinstance(result[0], Period) assert result[0].freq == index.freq - def test_is_full(self): - index = PeriodIndex([2005, 2007, 2009], freq="A") - assert not index.is_full - - index = PeriodIndex([2005, 2006, 2007], freq="A") - assert index.is_full - - index = PeriodIndex([2005, 2005, 2007], freq="A") - assert not index.is_full - - index = PeriodIndex([2005, 2005, 2006], freq="A") - assert index.is_full - - index = PeriodIndex([2006, 2005, 2005], freq="A") - with pytest.raises(ValueError, match="Index is not monotonic"): - index.is_full - - assert index[:0].is_full - def test_with_multi_index(self): # #1705 index = date_range("1/1/2012", periods=4, freq="12H") diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 665afe5137a52..b52560edb5606 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -144,46 +144,6 @@ def test_order(self): tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None - def test_drop_duplicates_metadata(self, freq_sample): - # GH 10115 - idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") - result = idx.drop_duplicates() - tm.assert_index_equal(idx, result) - assert idx.freq == result.freq - - idx_dup = idx.append(idx) - assert idx_dup.freq is None # freq is reset - result = idx_dup.drop_duplicates() - expected = idx._with_freq(None) - tm.assert_index_equal(expected, result) - assert result.freq is None - - @pytest.mark.parametrize( - "keep, expected, index", - [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), - ( - False, - np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10), - ), - ], - ) - def test_drop_duplicates(self, freq_sample, keep, expected, index): - # to check Index/Series compat - idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx") - idx = idx.append(idx[:5]) - - tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) - expected = idx[~expected] - - result = idx.drop_duplicates(keep=keep) - tm.assert_index_equal(result, expected) - - result = Series(idx).drop_duplicates(keep=keep) - tm.assert_series_equal(result, Series(expected, index=index)) - def test_infer_freq(self, freq_sample): # GH#11018 idx = timedelta_range("1", freq=freq_sample, periods=10) From 9a629c0cb21e001068fabc8302064621470b0a33 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Feb 2021 15:39:58 -0800 Subject: [PATCH 4/6] share test_nat --- .../tests/indexes/datetimelike_/test_nat.py | 54 +++++++++++++++++++ pandas/tests/indexes/datetimes/test_ops.py | 16 ------ pandas/tests/indexes/period/test_ops.py | 16 ------ pandas/tests/indexes/timedeltas/test_ops.py | 16 ------ 4 files changed, 54 insertions(+), 48 deletions(-) create mode 100644 pandas/tests/indexes/datetimelike_/test_nat.py diff --git a/pandas/tests/indexes/datetimelike_/test_nat.py b/pandas/tests/indexes/datetimelike_/test_nat.py new file mode 100644 index 0000000000000..b4a72ec65bd91 --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_nat.py @@ -0,0 +1,54 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + NaT, + PeriodIndex, + TimedeltaIndex, +) +import pandas._testing as tm + + +class NATests: + def test_nat(self, index_without_na): + empty_index = index_without_na[:0] + + index_with_na = index_without_na.copy(deep=True) + index_with_na._data[1] = NaT + + assert type(index_without_na)._na_value is NaT + assert empty_index._na_value is NaT + assert index_with_na._na_value is NaT + assert index_without_na._na_value is NaT + + idx = index_without_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) + assert idx.hasnans is False + + idx = index_with_na + assert idx._can_hold_na + + tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) + assert idx.hasnans is True + + +class TestDatetimeIndexNA(NATests): + @pytest.fixture + def index_without_na(self, tz_naive_fixture): + tz = tz_naive_fixture + return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) + + +class TestTimedeltaIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return TimedeltaIndex(["1 days", "2 days"]) + + +class TestPeriodIndexNA(NATests): + @pytest.fixture + def index_without_na(self): + return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 29af5cb456179..21f142dcaa5b1 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -204,22 +204,6 @@ def test_infer_freq(self, freq_sample): tm.assert_index_equal(idx, result) assert result.freq == freq_sample - def test_nat(self, tz_naive_fixture): - tz = tz_naive_fixture - assert DatetimeIndex._na_value is pd.NaT - assert DatetimeIndex([])._na_value is pd.NaT - - idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) - assert idx._can_hold_na - - assert idx.hasnans is False - - idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - assert idx.hasnans is True - @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 4205571102ffe..26cf6895af02c 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -234,22 +234,6 @@ def test_order(self): tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq == "D" - def test_nat(self): - assert PeriodIndex._na_value is NaT - assert PeriodIndex([], freq="M")._na_value is NaT - - idx = PeriodIndex(["2011-01-01", "2011-01-02"], freq="D") - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - assert idx.hasnans is False - - idx = PeriodIndex(["2011-01-01", "NaT"], freq="D") - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - assert idx.hasnans is True - def test_freq_setter_deprecated(self): # GH 20678 idx = pd.period_range("2018Q1", periods=4, freq="Q") diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index b52560edb5606..b7dbd19450b28 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -151,22 +151,6 @@ def test_infer_freq(self, freq_sample): tm.assert_index_equal(idx, result) assert result.freq == freq_sample - def test_nat(self): - assert TimedeltaIndex._na_value is NaT - assert TimedeltaIndex([])._na_value is NaT - - idx = TimedeltaIndex(["1 days", "2 days"]) - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) - assert idx.hasnans is False - - idx = TimedeltaIndex(["1 days", "NaT"]) - assert idx._can_hold_na - - tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) - assert idx.hasnans is True - @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) def test_freq_setter(self, values, freq): From 0b5e6a8cdc229adf14125f493135cb0978f75998 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Feb 2021 17:49:07 -0800 Subject: [PATCH 5/6] TST: share sort_values tests --- .../indexes/datetimelike_/test_sort_values.py | 322 ++++++++++++++++++ .../tests/indexes/datetimes/test_datetime.py | 22 -- pandas/tests/indexes/datetimes/test_ops.py | 83 ----- pandas/tests/indexes/period/test_ops.py | 159 --------- pandas/tests/indexes/timedeltas/test_ops.py | 60 ---- .../indexes/timedeltas/test_timedelta.py | 25 -- 6 files changed, 322 insertions(+), 349 deletions(-) create mode 100644 pandas/tests/indexes/datetimelike_/test_sort_values.py diff --git a/pandas/tests/indexes/datetimelike_/test_sort_values.py b/pandas/tests/indexes/datetimelike_/test_sort_values.py new file mode 100644 index 0000000000000..c76155d11ca0e --- /dev/null +++ b/pandas/tests/indexes/datetimelike_/test_sort_values.py @@ -0,0 +1,322 @@ +import numpy as np +import pytest + +from pandas import ( + DatetimeIndex, + Index, + NaT, + PeriodIndex, + TimedeltaIndex, + timedelta_range, +) +import pandas._testing as tm + + +def _check_freq(index, expected_index): + if isinstance(index, PeriodIndex): + assert index.freq == expected_index.freq + + +def check_freq_ascending(ordered, orig, ascending): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is generated (or generate-able) with + period_range/date_range/timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + if ascending: + assert ordered.freq.n == orig.freq.n + else: + assert ordered.freq.n == -1 * orig.freq.n + + +def check_freq_nonmonotonic(ordered, orig): + """ + Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex + when the original index is _not_ generated (or generate-able) with + period_range/date_range//timedelta_range. + """ + if isinstance(ordered, PeriodIndex): + assert ordered.freq == orig.freq + elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)): + assert ordered.freq is None + + +class TestSortValues: + @pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex]) + def non_monotonic_idx(self, request): + if request.param is DatetimeIndex: + return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + elif request.param is PeriodIndex: + dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) + return dti.to_period("D") + else: + return TimedeltaIndex( + ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"] + ) + + def test_argmin_argmax(self, non_monotonic_idx): + assert non_monotonic_idx.argmin() == 1 + assert non_monotonic_idx.argmax() == 0 + + def test_sort_values(self, non_monotonic_idx): + idx = non_monotonic_idx + ordered = idx.sort_values() + assert ordered.is_monotonic + + ordered = idx.sort_values(ascending=False) + assert ordered[::-1].is_monotonic + + ordered, dexer = idx.sort_values(return_indexer=True) + assert ordered.is_monotonic + tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) + + ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) + assert ordered[::-1].is_monotonic + tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) + + def check_sort_values_with_freq(self, idx): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, idx) + check_freq_ascending(ordered, idx, True) + + ordered = idx.sort_values(ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + check_freq_ascending(ordered, idx, False) + + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, idx) + tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp)) + check_freq_ascending(ordered, idx, True) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + expected = idx[::-1] + tm.assert_index_equal(ordered, expected) + tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp)) + check_freq_ascending(ordered, idx, False) + + @pytest.mark.parametrize("freq", ["D", "H"]) + def test_sort_values_with_freq_timedeltaindex(self, freq): + # GH#10295 + idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx") + + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + DatetimeIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" + ), + DatetimeIndex( + ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], + freq="H", + name="tzidx", + tz="Asia/Tokyo", + ), + ], + ) + def test_sort_values_with_freq_datetimeindex(self, idx): + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize("freq", ["D", "2D", "4D"]) + def test_sort_values_with_freq_periodindex(self, freq): + # here with_freq refers to being period_range-like + idx = PeriodIndex( + ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" + ) + self.check_sort_values_with_freq(idx) + + @pytest.mark.parametrize( + "idx", + [ + PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A"), + Index([2011, 2012, 2013], name="idx"), # for compatibility check + ], + ) + def test_sort_values_with_freq_periodindex2(self, idx): + # here with_freq indicates this is period_range-like + self.check_sort_values_with_freq(idx) + + def check_sort_values_without_freq(self, idx, expected): + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered = idx.sort_values() + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + if not idx.isna().any(): + ordered, indexer = idx.sort_values(return_indexer=True) + tm.assert_index_equal(ordered, expected) + + exp = np.array([0, 4, 3, 1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + + exp = np.array([2, 1, 3, 0, 4], dtype=np.intp) + tm.assert_numpy_array_equal(indexer, exp) + check_freq_nonmonotonic(ordered, idx) + + def test_sort_values_without_freq_timedeltaindex(self): + # GH#10295 + + idx = TimedeltaIndex( + ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" + ) + expected = TimedeltaIndex( + ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" + ) + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "index_dates,expected_dates", + [ + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], + ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + ), + ], + ) + def test_sort_values_without_freq_datetimeindex( + self, index_dates, expected_dates, tz_naive_fixture + ): + tz = tz_naive_fixture + + # without freq + idx = DatetimeIndex(index_dates, tz=tz, name="idx") + expected = DatetimeIndex(expected_dates, tz=tz, name="idx") + + self.check_sort_values_without_freq(idx, expected) + + @pytest.mark.parametrize( + "idx,expected", + [ + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx1", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx1", + ), + ), + ( + PeriodIndex( + [ + "2011-01-01", + "2011-01-03", + "2011-01-05", + "2011-01-02", + "2011-01-01", + ], + freq="D", + name="idx2", + ), + PeriodIndex( + [ + "2011-01-01", + "2011-01-01", + "2011-01-02", + "2011-01-03", + "2011-01-05", + ], + freq="D", + name="idx2", + ), + ), + ( + PeriodIndex( + [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], + freq="D", + name="idx3", + ), + PeriodIndex( + [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], + freq="D", + name="idx3", + ), + ), + ( + PeriodIndex( + ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A" + ), + PeriodIndex( + ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A" + ), + ), + ( + # For compatibility check + Index([2011, 2013, 2015, 2012, 2011], name="idx"), + Index([2011, 2011, 2012, 2013, 2015], name="idx"), + ), + ], + ) + def test_sort_values_without_freq_periodindex(self, idx, expected): + # here without_freq means not generateable by period_range + self.check_sort_values_without_freq(idx, expected) + + def test_sort_values_without_freq_periodindex_nat(self): + # doesnt quite fit into check_sort_values_without_freq + idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") + expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") + + ordered = idx.sort_values(na_position="first") + tm.assert_index_equal(ordered, expected) + check_freq_nonmonotonic(ordered, idx) + + ordered = idx.sort_values(ascending=False) + tm.assert_index_equal(ordered, expected[::-1]) + check_freq_nonmonotonic(ordered, idx) + + +def test_order_stability_compat(): + # GH#35922. sort_values is stable both for normal and datetime-like Index + pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") + iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") + ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) + ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) + tm.assert_numpy_array_equal(indexer1, indexer2) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index e03de3c75704a..17b80fbc0afc2 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -147,28 +147,6 @@ def test_string_index_series_name_converted(self): result = df.T["1/3/2000"] assert result.name == df.index[2] - def test_argmin_argmax(self): - idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) - assert idx.argmin() == 1 - assert idx.argmax() == 0 - - def test_sort_values(self): - idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"]) - - ordered = idx.sort_values() - assert ordered.is_monotonic - - ordered = idx.sort_values(ascending=False) - assert ordered[::-1].is_monotonic - - ordered, dexer = idx.sort_values(return_indexer=True) - assert ordered.is_monotonic - tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp)) - - ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - assert ordered[::-1].is_monotonic - tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp)) - def test_groupby_function_tuple_1677(self): df = DataFrame(np.random.rand(100), index=date_range("1/1/2000", periods=100)) monthly_group = df.groupby(lambda x: (x.year, x.month)) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 21f142dcaa5b1..49288af89ee22 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -114,89 +114,6 @@ def test_value_counts_unique(self, tz_naive_fixture): tm.assert_index_equal(idx.unique(), exp_idx) - @pytest.mark.parametrize( - "idx", - [ - DatetimeIndex( - ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" - ), - DatetimeIndex( - ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], - freq="H", - name="tzidx", - tz="Asia/Tokyo", - ), - ], - ) - def test_order_with_freq(self, idx): - ordered = idx.sort_values() - tm.assert_index_equal(ordered, idx) - assert ordered.freq == idx.freq - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - tm.assert_index_equal(ordered, expected) - assert ordered.freq == expected.freq - assert ordered.freq.n == -1 - - ordered, indexer = idx.sort_values(return_indexer=True) - tm.assert_index_equal(ordered, idx) - tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) - assert ordered.freq == idx.freq - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - expected = idx[::-1] - tm.assert_index_equal(ordered, expected) - tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) - assert ordered.freq == expected.freq - assert ordered.freq.n == -1 - - @pytest.mark.parametrize( - "index_dates,expected_dates", - [ - ( - ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], - ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], - ), - ( - ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], - ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], - ), - ( - [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], - [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], - ), - ], - ) - def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): - tz = tz_naive_fixture - - # without freq - index = DatetimeIndex(index_dates, tz=tz, name="idx") - expected = DatetimeIndex(expected_dates, tz=tz, name="idx") - - ordered = index.sort_values(na_position="first") - tm.assert_index_equal(ordered, expected) - assert ordered.freq is None - - ordered = index.sort_values(ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - assert ordered.freq is None - - ordered, indexer = index.sort_values(return_indexer=True, na_position="first") - tm.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - assert ordered.freq is None - - ordered, indexer = index.sort_values(return_indexer=True, ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 0, 4]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - assert ordered.freq is None - def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 26cf6895af02c..4ca98f6bbcb75 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -3,7 +3,6 @@ import pandas as pd from pandas import ( - Index, NaT, PeriodIndex, Series, @@ -85,155 +84,6 @@ def test_value_counts_unique(self): tm.assert_index_equal(idx.unique(), exp_idx) - def test_order_compat(self): - def _check_freq(index, expected_index): - if isinstance(index, PeriodIndex): - assert index.freq == expected_index.freq - - pidx = PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="A") - # for compatibility check - iidx = Index([2011, 2012, 2013], name="idx") - for idx in [pidx, iidx]: - ordered = idx.sort_values() - tm.assert_index_equal(ordered, idx) - _check_freq(ordered, idx) - - ordered = idx.sort_values(ascending=False) - tm.assert_index_equal(ordered, idx[::-1]) - _check_freq(ordered, idx[::-1]) - - ordered, indexer = idx.sort_values(return_indexer=True) - tm.assert_index_equal(ordered, idx) - tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) - _check_freq(ordered, idx) - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - tm.assert_index_equal(ordered, idx[::-1]) - tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) - _check_freq(ordered, idx[::-1]) - - pidx = PeriodIndex( - ["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A" - ) - pexpected = PeriodIndex( - ["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="A" - ) - # for compatibility check - iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") - iexpected = Index([2011, 2011, 2012, 2013, 2015], name="idx") - for idx, expected in [(pidx, pexpected), (iidx, iexpected)]: - ordered = idx.sort_values() - tm.assert_index_equal(ordered, expected) - _check_freq(ordered, idx) - - ordered = idx.sort_values(ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - _check_freq(ordered, idx) - - ordered, indexer = idx.sort_values(return_indexer=True) - tm.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - _check_freq(ordered, idx) - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - _check_freq(ordered, idx) - - pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") - - result = pidx.sort_values(na_position="first") - expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") - tm.assert_index_equal(result, expected) - assert result.freq == "D" - - result = pidx.sort_values(ascending=False) - expected = PeriodIndex(["2013", "2011", "2011", "NaT"], name="pidx", freq="D") - tm.assert_index_equal(result, expected) - assert result.freq == "D" - - def test_order(self): - for freq in ["D", "2D", "4D"]: - idx = PeriodIndex( - ["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx" - ) - - ordered = idx.sort_values() - tm.assert_index_equal(ordered, idx) - assert ordered.freq == idx.freq - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - tm.assert_index_equal(ordered, expected) - assert ordered.freq == expected.freq - assert ordered.freq == freq - - ordered, indexer = idx.sort_values(return_indexer=True) - tm.assert_index_equal(ordered, idx) - tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) - assert ordered.freq == idx.freq - assert ordered.freq == freq - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - expected = idx[::-1] - tm.assert_index_equal(ordered, expected) - tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) - assert ordered.freq == expected.freq - assert ordered.freq == freq - - idx1 = PeriodIndex( - ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], - freq="D", - name="idx1", - ) - exp1 = PeriodIndex( - ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], - freq="D", - name="idx1", - ) - - idx2 = PeriodIndex( - ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], - freq="D", - name="idx2", - ) - exp2 = PeriodIndex( - ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], - freq="D", - name="idx2", - ) - - idx3 = PeriodIndex( - [NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT], freq="D", name="idx3" - ) - exp3 = PeriodIndex( - [NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"], freq="D", name="idx3" - ) - - for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: - ordered = idx.sort_values(na_position="first") - tm.assert_index_equal(ordered, expected) - assert ordered.freq == "D" - - ordered = idx.sort_values(ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - assert ordered.freq == "D" - - ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") - tm.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - assert ordered.freq == "D" - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 0, 4]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - assert ordered.freq == "D" - def test_freq_setter_deprecated(self): # GH 20678 idx = pd.period_range("2018Q1", periods=4, freq="Q") @@ -245,12 +95,3 @@ def test_freq_setter_deprecated(self): # warning for setter with pytest.raises(AttributeError, match="can't set attribute"): idx.freq = pd.offsets.Day() - - -def test_order_stability_compat(): - # GH 35922. sort_values is stable both for normal and datetime-like Index - pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") - iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") - ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) - ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) - tm.assert_numpy_array_equal(indexer1, indexer2) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index b7dbd19450b28..8bb86057e7084 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -84,66 +84,6 @@ def test_unknown_attribute(self): with pytest.raises(AttributeError, match=msg): ts.foo - def test_order(self): - # GH 10295 - idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") - idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") - - for idx in [idx1, idx2]: - ordered = idx.sort_values() - tm.assert_index_equal(ordered, idx) - assert ordered.freq == idx.freq - - ordered = idx.sort_values(ascending=False) - expected = idx[::-1] - tm.assert_index_equal(ordered, expected) - assert ordered.freq == expected.freq - assert ordered.freq.n == -1 - - ordered, indexer = idx.sort_values(return_indexer=True) - tm.assert_index_equal(ordered, idx) - tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) - assert ordered.freq == idx.freq - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - tm.assert_index_equal(ordered, idx[::-1]) - assert ordered.freq == expected.freq - assert ordered.freq.n == -1 - - idx1 = TimedeltaIndex( - ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1" - ) - exp1 = TimedeltaIndex( - ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1" - ) - - idx2 = TimedeltaIndex( - ["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2" - ) - - for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: - ordered = idx.sort_values() - tm.assert_index_equal(ordered, expected) - assert ordered.freq is None - - ordered = idx.sort_values(ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - assert ordered.freq is None - - ordered, indexer = idx.sort_values(return_indexer=True) - tm.assert_index_equal(ordered, expected) - - exp = np.array([0, 4, 3, 1, 2]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - assert ordered.freq is None - - ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) - tm.assert_index_equal(ordered, expected[::-1]) - - exp = np.array([2, 1, 3, 0, 4]) - tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) - assert ordered.freq is None - def test_infer_freq(self, freq_sample): # GH#11018 idx = timedelta_range("1", freq=freq_sample, periods=10) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index d16a32247b917..d0f4828e8c7bd 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -64,31 +64,6 @@ def test_isin(self): index.isin([index[2], 5]), np.array([False, False, True, False]) ) - def test_sort_values(self): - - idx = TimedeltaIndex(["4d", "1d", "2d"]) - - ordered = idx.sort_values() - assert ordered.is_monotonic - - ordered = idx.sort_values(ascending=False) - assert ordered[::-1].is_monotonic - - ordered, dexer = idx.sort_values(return_indexer=True) - assert ordered.is_monotonic - - tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0]), check_dtype=False) - - ordered, dexer = idx.sort_values(return_indexer=True, ascending=False) - assert ordered[::-1].is_monotonic - - tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1]), check_dtype=False) - - def test_argmin_argmax(self): - idx = TimedeltaIndex(["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]) - assert idx.argmin() == 1 - assert idx.argmax() == 0 - def test_misc_coverage(self): rng = timedelta_range("1 day", periods=5) From f7db2eba4a28227ccdebee55033fed368dc08d31 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 25 Feb 2021 17:50:01 -0800 Subject: [PATCH 6/6] remove unused helper --- pandas/tests/indexes/datetimelike_/test_sort_values.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/indexes/datetimelike_/test_sort_values.py b/pandas/tests/indexes/datetimelike_/test_sort_values.py index c76155d11ca0e..ad9c5ca848615 100644 --- a/pandas/tests/indexes/datetimelike_/test_sort_values.py +++ b/pandas/tests/indexes/datetimelike_/test_sort_values.py @@ -12,11 +12,6 @@ import pandas._testing as tm -def _check_freq(index, expected_index): - if isinstance(index, PeriodIndex): - assert index.freq == expected_index.freq - - def check_freq_ascending(ordered, orig, ascending): """ Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex