From 8778812b88126f911eb849ce2b8eada998c16455 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 12 Aug 2019 21:48:21 -0500 Subject: [PATCH 1/5] ENH: MultiIndex.from_product infers level names (GH27292) --- pandas/core/indexes/multi.py | 23 ++++++++++- .../tests/indexes/multi/test_constructor.py | 38 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index b614952ba1e04..ffeb7c91f74b2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -508,7 +508,9 @@ def from_product(cls, iterables, sortorder=None, names=None): Level of sortedness (must be lexicographically sorted by that level). names : list / sequence of str, optional - Names for the levels in the index. + Names for the levels in the index. If not provided, these + will be inferred from iterables if the iterable has a + name attribute. Returns ------- @@ -526,6 +528,18 @@ def from_product(cls, iterables, sortorder=None, names=None): >>> colors = ['green', 'purple'] >>> pd.MultiIndex.from_product([numbers, colors], ... names=['number', 'color']) + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], + names=['number', 'color']) + + >>> numbers = pd.Series([0, 1, 2], name='number') + >>> colors = pd.Series(['green', 'purple'], name='color') + >>> pd.MultiIndex.from_product([numbers, colors]) + ... MultiIndex([(0, 'green'), (0, 'purple'), (1, 'green'), @@ -541,6 +555,13 @@ def from_product(cls, iterables, sortorder=None, names=None): elif is_iterator(iterables): iterables = list(iterables) + # Infer names from iterable if attribute is available + if names is None: + names = [idx.name if hasattr(idx, "name") else None for idx in iterables] + + if all(name is None for name in names): + names = None + codes, levels = _factorize_from_iterables(iterables) codes = cartesian_product(codes) return MultiIndex(levels, codes, sortorder=sortorder, names=names) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 86c9ee3455d0b..91d9dd39b4925 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -473,6 +473,44 @@ def test_from_product_datetimeindex(): tm.assert_numpy_array_equal(mi.values, etalon) +def test_from_product_infer_names(): + a = pd.Series([1, 2, 3], name="foo") + b = pd.Series(["a", "b"], name="bar") + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=["foo", "bar"], + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_infers_partial_names(): + a = pd.Series([1, 2, 3], name="foo") + b = ["a", "b"] + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=["foo", None], + ) + tm.assert_index_equal(result, expected) + + +def test_from_product_infers_all_none_passed(): + # This ensures that if no names can be inferred, names + # is set to None, instead of a list of None + a = [1, 2, 3] + b = ["a", "b"] + result = MultiIndex.from_product([a, b]) + expected = MultiIndex( + levels=[[1, 2, 3], ["a", "b"]], + codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], + names=None, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("ordered", [False, True]) @pytest.mark.parametrize("f", [lambda x: x, lambda x: pd.Series(x), lambda x: x.values]) def test_from_product_index_series_categorical(ordered, f): From bfeea161d49e53f120d58ff9468f5c7b3466a249 Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 12 Aug 2019 21:59:29 -0500 Subject: [PATCH 2/5] ENH: MultiIndex.from_product infers level names -- updated whatsnew (GH27292) --- doc/source/whatsnew/v0.25.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 21f1fa7ddec1f..25a459e81c903 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -12,7 +12,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- Changed :meth:`MultiIndex.from_product` to infer level names from inputs if possible - - From 6a979cefba1c9a02a56379e0fdec3a2d13cf823a Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 14 Aug 2019 18:24:20 -0500 Subject: [PATCH 3/5] fixed whatsnew merge --- doc/source/whatsnew/v0.25.1.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/indexes/multi.py | 2 +- .../tests/indexes/multi/test_constructor.py | 44 ++++++------------- 4 files changed, 17 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 25a459e81c903..21f1fa7ddec1f 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -12,7 +12,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ -- Changed :meth:`MultiIndex.from_product` to infer level names from inputs if possible +- - - diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index aeed3668fe774..abf29b62f425a 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -29,7 +29,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :meth:`MultiIndex.from_product` infers level names from inputs when not explicitly provided (:issue:`27292`) - .. _whatsnew_1000.api_breaking: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ffeb7c91f74b2..f1c4ec95b8383 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -557,7 +557,7 @@ def from_product(cls, iterables, sortorder=None, names=None): # Infer names from iterable if attribute is available if names is None: - names = [idx.name if hasattr(idx, "name") else None for idx in iterables] + names = [getattr(idx, "name", None) for idx in iterables] if all(name is None for name in names): names = None diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 91d9dd39b4925..6d88db898ab4d 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -473,40 +473,24 @@ def test_from_product_datetimeindex(): tm.assert_numpy_array_equal(mi.values, etalon) -def test_from_product_infer_names(): - a = pd.Series([1, 2, 3], name="foo") - b = pd.Series(["a", "b"], name="bar") - result = MultiIndex.from_product([a, b]) - expected = MultiIndex( - levels=[[1, 2, 3], ["a", "b"]], - codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], - names=["foo", "bar"], - ) - tm.assert_index_equal(result, expected) - - -def test_from_product_infers_partial_names(): - a = pd.Series([1, 2, 3], name="foo") - b = ["a", "b"] - result = MultiIndex.from_product([a, b]) - expected = MultiIndex( - levels=[[1, 2, 3], ["a", "b"]], - codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], - names=["foo", None], - ) - tm.assert_index_equal(result, expected) - - -def test_from_product_infers_all_none_passed(): - # This ensures that if no names can be inferred, names - # is set to None, instead of a list of None - a = [1, 2, 3] - b = ["a", "b"] +@pytest.mark.parametrize( + "a, b, expected_name", + [ + ( + pd.Series([1, 2, 3], name="foo"), + pd.Series(["a", "b"], name="bar"), + ["foo", "bar"], + ), + (pd.Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), + ([1, 2, 3], ["a", "b"], None), + ], +) +def test_from_product_infers_partial_names(a, b, expected_names): result = MultiIndex.from_product([a, b]) expected = MultiIndex( levels=[[1, 2, 3], ["a", "b"]], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], - names=None, + names=expected_names, ) tm.assert_index_equal(result, expected) From 022b970dfa0abb01c32ec1ef22fae0bb22ea76f2 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 13 Aug 2019 19:28:53 -0500 Subject: [PATCH 4/5] typo in parameterized name --- pandas/tests/indexes/multi/test_constructor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 6d88db898ab4d..2e28d396ab313 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -474,7 +474,7 @@ def test_from_product_datetimeindex(): @pytest.mark.parametrize( - "a, b, expected_name", + "a, b, expected_names", [ ( pd.Series([1, 2, 3], name="foo"), From f040ee4e99d396c5a26f3d9fa52fe033c100e9bf Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 14 Aug 2019 17:43:39 -0500 Subject: [PATCH 5/5] Added test, changes made to docstrings and whatsnew --- pandas/core/indexes/multi.py | 6 +++--- pandas/tests/indexes/multi/test_constructor.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f1c4ec95b8383..612acdef17044 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -508,9 +508,9 @@ def from_product(cls, iterables, sortorder=None, names=None): Level of sortedness (must be lexicographically sorted by that level). names : list / sequence of str, optional - Names for the levels in the index. If not provided, these - will be inferred from iterables if the iterable has a - name attribute. + Names for the levels in the index. If not provided, names + will be inferred from iterables if the elements of iterables + have a name attribute. Returns ------- diff --git a/pandas/tests/indexes/multi/test_constructor.py b/pandas/tests/indexes/multi/test_constructor.py index 2e28d396ab313..bff325b727344 100644 --- a/pandas/tests/indexes/multi/test_constructor.py +++ b/pandas/tests/indexes/multi/test_constructor.py @@ -481,6 +481,7 @@ def test_from_product_datetimeindex(): pd.Series(["a", "b"], name="bar"), ["foo", "bar"], ), + (pd.Series([1, 2, 3]), pd.Series(["a", "b"]), None), (pd.Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]), ([1, 2, 3], ["a", "b"], None), ],