From 982a839408a4f8b7bac5c6df7d664455d215af34 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Thu, 19 May 2022 17:08:16 +0200
Subject: [PATCH 01/17] DOC: Improve reshape\concat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Matěj Štágl <stagl.matej@centrum.cz>
---
 pandas/core/reshape/concat.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index c2b36dab4a67e..bde90f819f359 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -344,6 +344,20 @@ def concat(
     Traceback (most recent call last):
         ...
     ValueError: Indexes have overlapping values: ['a']
+
+    Append a single row to the end of a ``DataFrame`` object.
+
+    >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
+    >>> df7
+        a   b
+    0   1   2
+    >>> new_row = pd.Series([3])
+    >>> new_row
+    0   3
+    >>>     pd.concat([df7, new_row.to_frame().T], ignore_index=True)
+         a    b    0
+    0  1.0  2.0  NaN
+    1  NaN  NaN  3.0
     """
     op = _Concatenator(
         objs,

From 108d96e0009f89466e6107169d55a2bf5b6121a7 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:03:06 +0200
Subject: [PATCH 02/17] Update concat.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Matěj Štágl <stagl.matej@centrum.cz>
---
 pandas/core/reshape/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index bde90f819f359..1ac4b6c1f8897 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -354,7 +354,7 @@ def concat(
     >>> new_row = pd.Series([3])
     >>> new_row
     0   3
-    >>>     pd.concat([df7, new_row.to_frame().T], ignore_index=True)
+    >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
          a    b    0
     0  1.0  2.0  NaN
     1  NaN  NaN  3.0

From f4e394d4fedfbfeb5ea562c8868e5520bc99c2b7 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:04:53 +0200
Subject: [PATCH 03/17] Revert "Merge branch 'pandas-dev:main' into doc-concat"

This reverts commit 824b9bd4eec93b75cc4d0fa64cc858612ecba341, reversing
changes made to 982a839408a4f8b7bac5c6df7d664455d215af34.
---
 asv_bench/benchmarks/gil.py                   | 82 ++++++-------------
 .../06_calculate_statistics.rst               |  4 +-
 doc/source/user_guide/10min.rst               |  2 +-
 doc/source/user_guide/groupby.rst             | 26 +++---
 doc/source/user_guide/indexing.rst            |  2 +-
 doc/source/user_guide/reshaping.rst           | 15 ++--
 doc/source/user_guide/timeseries.rst          |  4 +-
 doc/source/whatsnew/v0.18.1.rst               |  2 +-
 doc/source/whatsnew/v0.19.0.rst               |  4 +-
 doc/source/whatsnew/v1.4.3.rst                |  1 -
 pandas/_testing/__init__.py                   | 50 +++++++++++
 pandas/core/groupby/groupby.py                | 11 +--
 pandas/tests/groupby/aggregate/test_numba.py  | 27 ------
 pandas/tests/groupby/transform/test_numba.py  | 27 ------
 14 files changed, 101 insertions(+), 156 deletions(-)

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index 31654a5c75617..af2efe56c2530 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -1,6 +1,3 @@
-from functools import wraps
-import threading
-
 import numpy as np
 
 from pandas import (
@@ -33,57 +30,21 @@
     from pandas._libs import algos
 except ImportError:
     from pandas import algos
+try:
+    from pandas._testing import test_parallel  # noqa: PDF014
 
+    have_real_test_parallel = True
+except ImportError:
+    have_real_test_parallel = False
 
-from .pandas_vb_common import BaseIO  # isort:skip
-
-
-def test_parallel(num_threads=2, kwargs_list=None):
-    """
-    Decorator to run the same function multiple times in parallel.
-
-    Parameters
-    ----------
-    num_threads : int, optional
-        The number of times the function is run in parallel.
-    kwargs_list : list of dicts, optional
-        The list of kwargs to update original
-        function kwargs on different threads.
-
-    Notes
-    -----
-    This decorator does not pass the return value of the decorated function.
-
-    Original from scikit-image:
-
-    https://github.com/scikit-image/scikit-image/pull/1519
-
-    """
-    assert num_threads > 0
-    has_kwargs_list = kwargs_list is not None
-    if has_kwargs_list:
-        assert len(kwargs_list) == num_threads
+    def test_parallel(num_threads=1):
+        def wrapper(fname):
+            return fname
 
-    def wrapper(func):
-        @wraps(func)
-        def inner(*args, **kwargs):
-            if has_kwargs_list:
-                update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
-            else:
-                update_kwargs = lambda i: kwargs
-            threads = []
-            for i in range(num_threads):
-                updated_kwargs = update_kwargs(i)
-                thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs)
-                threads.append(thread)
-            for thread in threads:
-                thread.start()
-            for thread in threads:
-                thread.join()
+        return wrapper
 
-        return inner
 
-    return wrapper
+from .pandas_vb_common import BaseIO  # isort:skip
 
 
 class ParallelGroupbyMethods:
@@ -92,7 +53,8 @@ class ParallelGroupbyMethods:
     param_names = ["threads", "method"]
 
     def setup(self, threads, method):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         N = 10**6
         ngroups = 10**3
         df = DataFrame(
@@ -124,7 +86,8 @@ class ParallelGroups:
     param_names = ["threads"]
 
     def setup(self, threads):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         size = 2**22
         ngroups = 10**3
         data = Series(np.random.randint(0, ngroups, size=size))
@@ -145,7 +108,8 @@ class ParallelTake1D:
     param_names = ["dtype"]
 
     def setup(self, dtype):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         N = 10**6
         df = DataFrame({"col": np.arange(N, dtype=dtype)})
         indexer = np.arange(100, len(df) - 100)
@@ -167,7 +131,8 @@ class ParallelKth:
     repeat = 5
 
     def setup(self):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         N = 10**7
         k = 5 * 10**5
         kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}]
@@ -184,7 +149,8 @@ def time_kth_smallest(self):
 
 class ParallelDatetimeFields:
     def setup(self):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         N = 10**6
         self.dti = date_range("1900-01-01", periods=N, freq="T")
         self.period = self.dti.to_period("D")
@@ -238,7 +204,8 @@ class ParallelRolling:
     param_names = ["method"]
 
     def setup(self, method):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         win = 100
         arr = np.random.rand(100000)
         if hasattr(DataFrame, "rolling"):
@@ -281,7 +248,8 @@ class ParallelReadCSV(BaseIO):
     param_names = ["dtype"]
 
     def setup(self, dtype):
-
+        if not have_real_test_parallel:
+            raise NotImplementedError
         rows = 10000
         cols = 50
         data = {
@@ -316,6 +284,8 @@ class ParallelFactorize:
     param_names = ["threads"]
 
     def setup(self, threads):
+        if not have_real_test_parallel:
+            raise NotImplementedError
 
         strings = tm.makeStringIndex(100000)
 
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
index 346a5cecf601d..298d0c4e0111c 100644
--- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
@@ -154,11 +154,11 @@ The apply and combine steps are typically done together in pandas.
 
 In the previous example, we explicitly selected the 2 columns first. If
 not, the ``mean`` method is applied to each column containing numerical
-columns by passing ``numeric_only=True``:
+columns:
 
 .. ipython:: python
 
-    titanic.groupby("Sex").mean(numeric_only=True)
+    titanic.groupby("Sex").mean()
 
 It does not make much sense to get the average value of the ``Pclass``.
 If we are only interested in the average age for each gender, the
diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
index 9916f13e015dd..9ccf191194e19 100644
--- a/doc/source/user_guide/10min.rst
+++ b/doc/source/user_guide/10min.rst
@@ -532,7 +532,7 @@ groups:
 
 .. ipython:: python
 
-   df.groupby("A")[["C", "D"]].sum()
+   df.groupby("A").sum()
 
 Grouping by multiple columns forms a hierarchical index, and again we can
 apply the :meth:`~pandas.core.groupby.GroupBy.sum` function:
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index f2d83885df2d0..f381d72069775 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -477,7 +477,7 @@ An obvious one is aggregation via the
 .. ipython:: python
 
    grouped = df.groupby("A")
-   grouped[["C", "D"]].aggregate(np.sum)
+   grouped.aggregate(np.sum)
 
    grouped = df.groupby(["A", "B"])
    grouped.aggregate(np.sum)
@@ -492,7 +492,7 @@ changed by using the ``as_index`` option:
    grouped = df.groupby(["A", "B"], as_index=False)
    grouped.aggregate(np.sum)
 
-   df.groupby("A", as_index=False)[["C", "D"]].sum()
+   df.groupby("A", as_index=False).sum()
 
 Note that you could use the ``reset_index`` DataFrame function to achieve the
 same result as the column names are stored in the resulting ``MultiIndex``:
@@ -730,7 +730,7 @@ optimized Cython implementations:
 
 .. ipython:: python
 
-   df.groupby("A")[["C", "D"]].sum()
+   df.groupby("A").sum()
    df.groupby(["A", "B"]).mean()
 
 Of course ``sum`` and ``mean`` are implemented on pandas objects, so the above
@@ -1159,12 +1159,13 @@ Again consider the example DataFrame we've been looking at:
 
 Suppose we wish to compute the standard deviation grouped by the ``A``
 column. There is a slight problem, namely that we don't care about the data in
-column ``B``. We refer to this as a "nuisance" column. You can avoid nuisance
-columns by specifying ``numeric_only=True``:
+column ``B``. We refer to this as a "nuisance" column. If the passed
+aggregation function can't be applied to some columns, the troublesome columns
+will be (silently) dropped. Thus, this does not pose any problems:
 
 .. ipython:: python
 
-   df.groupby("A").std(numeric_only=True)
+   df.groupby("A").std()
 
 Note that ``df.groupby('A').colname.std().`` is more efficient than
 ``df.groupby('A').std().colname``, so if the result of an aggregation function
@@ -1179,14 +1180,7 @@ is only interesting over one column (here ``colname``), it may be filtered
    If you do wish to include decimal or object columns in an aggregation with
    other non-nuisance data types, you must do so explicitly.
 
-.. warning::
-   The automatic dropping of nuisance columns has been deprecated and will be removed
-   in a future version of pandas. If columns are included that cannot be operated
-   on, pandas will instead raise an error. In order to avoid this, either select
-   the columns you wish to operate on or specify ``numeric_only=True``.
-
 .. ipython:: python
-    :okwarning:
 
     from decimal import Decimal
 
@@ -1310,7 +1304,7 @@ Groupby a specific column with the desired frequency. This is like resampling.
 
 .. ipython:: python
 
-   df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"])[["Quantity"]].sum()
+   df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum()
 
 You have an ambiguous specification in that you have a named index and a column
 that could be potential groupers.
@@ -1319,9 +1313,9 @@ that could be potential groupers.
 
    df = df.set_index("Date")
    df["Date"] = df.index + pd.offsets.MonthEnd(2)
-   df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"])[["Quantity"]].sum()
+   df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"]).sum()
 
-   df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"])[["Quantity"]].sum()
+   df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"]).sum()
 
 
 Taking the first rows of each group
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index 3c08b5a498eea..a94681924d211 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -583,7 +583,7 @@ without using a temporary variable.
 .. ipython:: python
 
    bb = pd.read_csv('data/baseball.csv', index_col='id')
-   (bb.groupby(['year', 'team']).sum(numeric_only=True)
+   (bb.groupby(['year', 'team']).sum()
       .loc[lambda df: df['r'] > 100])
 
 
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index b24890564d1bf..f9e68b1b39ddc 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -414,11 +414,12 @@ We can produce pivot tables from this data very easily:
 
 The result object is a :class:`DataFrame` having potentially hierarchical indexes on the
 rows and columns. If the ``values`` column name is not given, the pivot table
-will include all of the data in an additional level of hierarchy in the columns:
+will include all of the data that can be aggregated in an additional level of
+hierarchy in the columns:
 
 .. ipython:: python
 
-   pd.pivot_table(df[["A", "B", "C", "D", "E"]], index=["A", "B"], columns=["C"])
+   pd.pivot_table(df, index=["A", "B"], columns=["C"])
 
 Also, you can use :class:`Grouper` for ``index`` and ``columns`` keywords. For detail of :class:`Grouper`, see :ref:`Grouping with a Grouper specification <groupby.specify>`.
 
@@ -431,7 +432,7 @@ calling :meth:`~DataFrame.to_string` if you wish:
 
 .. ipython:: python
 
-   table = pd.pivot_table(df, index=["A", "B"], columns=["C"], values=["D", "E"])
+   table = pd.pivot_table(df, index=["A", "B"], columns=["C"])
    print(table.to_string(na_rep=""))
 
 Note that :meth:`~DataFrame.pivot_table` is also available as an instance method on DataFrame,
@@ -448,13 +449,7 @@ rows and columns:
 
 .. ipython:: python
 
-   table = df.pivot_table(
-       index=["A", "B"],
-       columns="C",
-       values=["D", "E"],
-       margins=True,
-       aggfunc=np.std
-   )
+   table = df.pivot_table(index=["A", "B"], columns="C", margins=True, aggfunc=np.std)
    table
 
 Additionally, you can call :meth:`DataFrame.stack` to display a pivoted DataFrame
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index c67d028b65b3e..582620d8b6479 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -1821,7 +1821,7 @@ to resample based on datetimelike column in the frame, it can passed to the
        ),
    )
    df
-   df.resample("M", on="date")[["a"]].sum()
+   df.resample("M", on="date").sum()
 
 Similarly, if you instead want to resample by a datetimelike
 level of ``MultiIndex``, its name or location can be passed to the
@@ -1829,7 +1829,7 @@ level of ``MultiIndex``, its name or location can be passed to the
 
 .. ipython:: python
 
-   df.resample("M", level="d")[["a"]].sum()
+   df.resample("M", level="d").sum()
 
 .. _timeseries.iterating-label:
 
diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
index 7d9008fdbdecd..f873d320822ae 100644
--- a/doc/source/whatsnew/v0.18.1.rst
+++ b/doc/source/whatsnew/v0.18.1.rst
@@ -166,7 +166,7 @@ without using temporary variable.
 .. ipython:: python
 
    bb = pd.read_csv("data/baseball.csv", index_col="id")
-   (bb.groupby(["year", "team"]).sum(numeric_only=True).loc[lambda df: df.r > 100])
+   (bb.groupby(["year", "team"]).sum().loc[lambda df: df.r > 100])
 
 .. _whatsnew_0181.partial_string_indexing:
 
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index 113bbcf0a05bc..a2bb935c708bc 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -497,8 +497,8 @@ Other enhancements
          ),
      )
      df
-     df.resample("M", on="date")[["a"]].sum()
-     df.resample("M", level="d")[["a"]].sum()
+     df.resample("M", on="date").sum()
+     df.resample("M", level="d").sum()
 
 - The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials <https://developers.google.com/identity/protocols/application-default-credentials>`__. See the docs for more details (:issue:`13577`).
 - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst
index 7c09eec212d69..23c8ad63bf7bb 100644
--- a/doc/source/whatsnew/v1.4.3.rst
+++ b/doc/source/whatsnew/v1.4.3.rst
@@ -16,7 +16,6 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
 - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
-- Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`)
 - Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`)
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 53e003e2ed7dd..603c2f081a31a 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -3,6 +3,7 @@
 import collections
 from datetime import datetime
 from decimal import Decimal
+from functools import wraps
 import operator
 import os
 import re
@@ -748,6 +749,55 @@ def makeMissingDataframe(density=0.9, random_state=None):
     return df
 
 
+def test_parallel(num_threads=2, kwargs_list=None):
+    """
+    Decorator to run the same function multiple times in parallel.
+
+    Parameters
+    ----------
+    num_threads : int, optional
+        The number of times the function is run in parallel.
+    kwargs_list : list of dicts, optional
+        The list of kwargs to update original
+        function kwargs on different threads.
+
+    Notes
+    -----
+    This decorator does not pass the return value of the decorated function.
+
+    Original from scikit-image:
+
+    https://github.com/scikit-image/scikit-image/pull/1519
+
+    """
+    assert num_threads > 0
+    has_kwargs_list = kwargs_list is not None
+    if has_kwargs_list:
+        assert len(kwargs_list) == num_threads
+    import threading
+
+    def wrapper(func):
+        @wraps(func)
+        def inner(*args, **kwargs):
+            if has_kwargs_list:
+                update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
+            else:
+                update_kwargs = lambda i: kwargs
+            threads = []
+            for i in range(num_threads):
+                updated_kwargs = update_kwargs(i)
+                thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs)
+                threads.append(thread)
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
+
+        return inner
+
+    return wrapper
+
+
 class SubclassedSeries(Series):
     _metadata = ["testattr", "name"]
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index f7c89b6e7dc49..0203d54e0de86 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1310,16 +1310,7 @@ def _numba_prep(self, data):
         sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False)
 
         sorted_data = data.take(sorted_index, axis=self.axis).to_numpy()
-        if len(self.grouper.groupings) > 1:
-            raise NotImplementedError(
-                "More than 1 grouping labels are not supported with engine='numba'"
-            )
-        # GH 46867
-        index_data = data.index
-        if isinstance(index_data, MultiIndex):
-            group_key = self.grouper.groupings[0].name
-            index_data = index_data.get_level_values(group_key)
-        sorted_index_data = index_data.take(sorted_index).to_numpy()
+        sorted_index_data = data.index.take(sorted_index).to_numpy()
 
         starts, ends = lib.generate_slices(sorted_ids, ngroups)
         return (
diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py
index 9f71c2c2fa0b6..ba58ac27284b8 100644
--- a/pandas/tests/groupby/aggregate/test_numba.py
+++ b/pandas/tests/groupby/aggregate/test_numba.py
@@ -211,30 +211,3 @@ def func_kwargs(values, index):
     )
     expected = DataFrame({"value": [1.0, 1.0, 1.0]})
     tm.assert_frame_equal(result, expected)
-
-
-@td.skip_if_no("numba")
-def test_multiindex_one_key(nogil, parallel, nopython):
-    def numba_func(values, index):
-        return 1
-
-    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
-    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
-    result = df.groupby("A").agg(
-        numba_func, engine="numba", engine_kwargs=engine_kwargs
-    )
-    expected = DataFrame([1.0], index=Index([1], name="A"), columns=["C"])
-    tm.assert_frame_equal(result, expected)
-
-
-@td.skip_if_no("numba")
-def test_multiindex_multi_key_not_supported(nogil, parallel, nopython):
-    def numba_func(values, index):
-        return 1
-
-    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
-    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
-    with pytest.raises(NotImplementedError, match="More than 1 grouping labels"):
-        df.groupby(["A", "B"]).agg(
-            numba_func, engine="numba", engine_kwargs=engine_kwargs
-        )
diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py
index 1b8570dbdc21d..a404e0b9304cc 100644
--- a/pandas/tests/groupby/transform/test_numba.py
+++ b/pandas/tests/groupby/transform/test_numba.py
@@ -199,30 +199,3 @@ def func_kwargs(values, index):
     )
     expected = DataFrame({"value": [1.0, 1.0, 1.0]})
     tm.assert_frame_equal(result, expected)
-
-
-@td.skip_if_no("numba")
-def test_multiindex_one_key(nogil, parallel, nopython):
-    def numba_func(values, index):
-        return 1
-
-    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
-    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
-    result = df.groupby("A").transform(
-        numba_func, engine="numba", engine_kwargs=engine_kwargs
-    )
-    expected = DataFrame([{"A": 1, "B": 2, "C": 1.0}]).set_index(["A", "B"])
-    tm.assert_frame_equal(result, expected)
-
-
-@td.skip_if_no("numba")
-def test_multiindex_multi_key_not_supported(nogil, parallel, nopython):
-    def numba_func(values, index):
-        return 1
-
-    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
-    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
-    with pytest.raises(NotImplementedError, match="More than 1 grouping labels"):
-        df.groupby(["A", "B"]).transform(
-            numba_func, engine="numba", engine_kwargs=engine_kwargs
-        )

From 7d4a81fd3c10eeab9f38309bc508373329f0ce4b Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:14:12 +0200
Subject: [PATCH 04/17] Update concat.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Matěj Štágl <stagl.matej@centrum.cz>
---
 pandas/core/reshape/concat.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 1ac4b6c1f8897..8f20f554c295e 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -351,13 +351,16 @@ def concat(
     >>> df7
         a   b
     0   1   2
-    >>> new_row = pd.Series([3])
-    >>> new_row
-    0   3
-    >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
-         a    b    0
-    0  1.0  2.0  NaN
-    1  NaN  NaN  3.0
+    >>> new_rows = pd.Series({'a': 3, 'b': 4})
+    >>> new_rows
+    0   3   4
+    >>> pd.concat([df7, new_rows.to_frame().T], ignore_index=True)
+         a    b
+    0    1    2
+    1    3    4
+
+    (It is not recomended to build DataFrames by adding single rows in a
+not loop. Build a list of rows and make a DataFrame in a single concat.)
     """
     op = _Concatenator(
         objs,

From 873a59f4f9d8745d91220e361f55938f775b01c0 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:21:13 +0200
Subject: [PATCH 05/17] Update concat.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Matěj Štágl <stagl.matej@centrum.cz>
---
 pandas/core/reshape/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 8f20f554c295e..1e5741f024392 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -359,7 +359,7 @@ def concat(
     0    1    2
     1    3    4
 
-    (It is not recomended to build DataFrames by adding single rows in a
+    (It is not recommended to build DataFrames by adding single rows in a
 not loop. Build a list of rows and make a DataFrame in a single concat.)
     """
     op = _Concatenator(

From 95139122722c3827cf34a6db7862ac0eeffaf727 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:29:33 +0200
Subject: [PATCH 06/17] Update concat.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Matěj Štágl <stagl.matej@centrum.cz>
---
 pandas/core/reshape/concat.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 1e5741f024392..78730db572e4d 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -226,6 +226,10 @@ def concat(
     pandas objects can be found `here
     <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
 
+    
+    It is not recommended to build DataFrames by adding single rows in a
+not loop. Build a list of rows and make a DataFrame in a single concat.
+
     Examples
     --------
     Combine two ``Series``.
@@ -351,16 +355,13 @@ def concat(
     >>> df7
         a   b
     0   1   2
-    >>> new_rows = pd.Series({'a': 3, 'b': 4})
-    >>> new_rows
+    >>> new_row = pd.Series({'a': 3, 'b': 4})
+    >>> new_row
     0   3   4
-    >>> pd.concat([df7, new_rows.to_frame().T], ignore_index=True)
+    >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
          a    b
     0    1    2
     1    3    4
-
-    (It is not recommended to build DataFrames by adding single rows in a
-not loop. Build a list of rows and make a DataFrame in a single concat.)
     """
     op = _Concatenator(
         objs,

From 0b2926527ec2b60ed3bfb3ddf433981113d74da0 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:44:54 +0200
Subject: [PATCH 07/17] Update pandas/core/reshape/concat.py

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
---
 pandas/core/reshape/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 78730db572e4d..e920a49ecdde3 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -228,7 +228,7 @@ def concat(
 
     
     It is not recommended to build DataFrames by adding single rows in a
-not loop. Build a list of rows and make a DataFrame in a single concat.
+for loop. Build a list of rows and make a DataFrame in a single concat.
 
     Examples
     --------

From eff3bad23f818be3e58c356d39685e460892b315 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Fri, 20 May 2022 19:45:20 +0200
Subject: [PATCH 08/17] Revert "Revert "Merge branch 'pandas-dev:main' into
 doc-concat""

This reverts commit f4e394d4fedfbfeb5ea562c8868e5520bc99c2b7.
---
 asv_bench/benchmarks/gil.py                   | 82 +++++++++++++------
 .../06_calculate_statistics.rst               |  4 +-
 doc/source/user_guide/10min.rst               |  2 +-
 doc/source/user_guide/groupby.rst             | 26 +++---
 doc/source/user_guide/indexing.rst            |  2 +-
 doc/source/user_guide/reshaping.rst           | 15 ++--
 doc/source/user_guide/timeseries.rst          |  4 +-
 doc/source/whatsnew/v0.18.1.rst               |  2 +-
 doc/source/whatsnew/v0.19.0.rst               |  4 +-
 doc/source/whatsnew/v1.4.3.rst                |  1 +
 pandas/_testing/__init__.py                   | 50 -----------
 pandas/core/groupby/groupby.py                | 11 ++-
 pandas/tests/groupby/aggregate/test_numba.py  | 27 ++++++
 pandas/tests/groupby/transform/test_numba.py  | 27 ++++++
 14 files changed, 156 insertions(+), 101 deletions(-)

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index af2efe56c2530..31654a5c75617 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -1,3 +1,6 @@
+from functools import wraps
+import threading
+
 import numpy as np
 
 from pandas import (
@@ -30,21 +33,57 @@
     from pandas._libs import algos
 except ImportError:
     from pandas import algos
-try:
-    from pandas._testing import test_parallel  # noqa: PDF014
 
-    have_real_test_parallel = True
-except ImportError:
-    have_real_test_parallel = False
 
-    def test_parallel(num_threads=1):
-        def wrapper(fname):
-            return fname
+from .pandas_vb_common import BaseIO  # isort:skip
 
-        return wrapper
 
+def test_parallel(num_threads=2, kwargs_list=None):
+    """
+    Decorator to run the same function multiple times in parallel.
 
-from .pandas_vb_common import BaseIO  # isort:skip
+    Parameters
+    ----------
+    num_threads : int, optional
+        The number of times the function is run in parallel.
+    kwargs_list : list of dicts, optional
+        The list of kwargs to update original
+        function kwargs on different threads.
+
+    Notes
+    -----
+    This decorator does not pass the return value of the decorated function.
+
+    Original from scikit-image:
+
+    https://github.com/scikit-image/scikit-image/pull/1519
+
+    """
+    assert num_threads > 0
+    has_kwargs_list = kwargs_list is not None
+    if has_kwargs_list:
+        assert len(kwargs_list) == num_threads
+
+    def wrapper(func):
+        @wraps(func)
+        def inner(*args, **kwargs):
+            if has_kwargs_list:
+                update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
+            else:
+                update_kwargs = lambda i: kwargs
+            threads = []
+            for i in range(num_threads):
+                updated_kwargs = update_kwargs(i)
+                thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs)
+                threads.append(thread)
+            for thread in threads:
+                thread.start()
+            for thread in threads:
+                thread.join()
+
+        return inner
+
+    return wrapper
 
 
 class ParallelGroupbyMethods:
@@ -53,8 +92,7 @@ class ParallelGroupbyMethods:
     param_names = ["threads", "method"]
 
     def setup(self, threads, method):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         N = 10**6
         ngroups = 10**3
         df = DataFrame(
@@ -86,8 +124,7 @@ class ParallelGroups:
     param_names = ["threads"]
 
     def setup(self, threads):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         size = 2**22
         ngroups = 10**3
         data = Series(np.random.randint(0, ngroups, size=size))
@@ -108,8 +145,7 @@ class ParallelTake1D:
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         N = 10**6
         df = DataFrame({"col": np.arange(N, dtype=dtype)})
         indexer = np.arange(100, len(df) - 100)
@@ -131,8 +167,7 @@ class ParallelKth:
     repeat = 5
 
     def setup(self):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         N = 10**7
         k = 5 * 10**5
         kwargs_list = [{"arr": np.random.randn(N)}, {"arr": np.random.randn(N)}]
@@ -149,8 +184,7 @@ def time_kth_smallest(self):
 
 class ParallelDatetimeFields:
     def setup(self):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         N = 10**6
         self.dti = date_range("1900-01-01", periods=N, freq="T")
         self.period = self.dti.to_period("D")
@@ -204,8 +238,7 @@ class ParallelRolling:
     param_names = ["method"]
 
     def setup(self, method):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         win = 100
         arr = np.random.rand(100000)
         if hasattr(DataFrame, "rolling"):
@@ -248,8 +281,7 @@ class ParallelReadCSV(BaseIO):
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        if not have_real_test_parallel:
-            raise NotImplementedError
+
         rows = 10000
         cols = 50
         data = {
@@ -284,8 +316,6 @@ class ParallelFactorize:
     param_names = ["threads"]
 
     def setup(self, threads):
-        if not have_real_test_parallel:
-            raise NotImplementedError
 
         strings = tm.makeStringIndex(100000)
 
diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
index 298d0c4e0111c..346a5cecf601d 100644
--- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
+++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst
@@ -154,11 +154,11 @@ The apply and combine steps are typically done together in pandas.
 
 In the previous example, we explicitly selected the 2 columns first. If
 not, the ``mean`` method is applied to each column containing numerical
-columns:
+columns by passing ``numeric_only=True``:
 
 .. ipython:: python
 
-    titanic.groupby("Sex").mean()
+    titanic.groupby("Sex").mean(numeric_only=True)
 
 It does not make much sense to get the average value of the ``Pclass``.
 If we are only interested in the average age for each gender, the
diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
index 9ccf191194e19..9916f13e015dd 100644
--- a/doc/source/user_guide/10min.rst
+++ b/doc/source/user_guide/10min.rst
@@ -532,7 +532,7 @@ groups:
 
 .. ipython:: python
 
-   df.groupby("A").sum()
+   df.groupby("A")[["C", "D"]].sum()
 
 Grouping by multiple columns forms a hierarchical index, and again we can
 apply the :meth:`~pandas.core.groupby.GroupBy.sum` function:
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index f381d72069775..f2d83885df2d0 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -477,7 +477,7 @@ An obvious one is aggregation via the
 .. ipython:: python
 
    grouped = df.groupby("A")
-   grouped.aggregate(np.sum)
+   grouped[["C", "D"]].aggregate(np.sum)
 
    grouped = df.groupby(["A", "B"])
    grouped.aggregate(np.sum)
@@ -492,7 +492,7 @@ changed by using the ``as_index`` option:
    grouped = df.groupby(["A", "B"], as_index=False)
    grouped.aggregate(np.sum)
 
-   df.groupby("A", as_index=False).sum()
+   df.groupby("A", as_index=False)[["C", "D"]].sum()
 
 Note that you could use the ``reset_index`` DataFrame function to achieve the
 same result as the column names are stored in the resulting ``MultiIndex``:
@@ -730,7 +730,7 @@ optimized Cython implementations:
 
 .. ipython:: python
 
-   df.groupby("A").sum()
+   df.groupby("A")[["C", "D"]].sum()
    df.groupby(["A", "B"]).mean()
 
 Of course ``sum`` and ``mean`` are implemented on pandas objects, so the above
@@ -1159,13 +1159,12 @@ Again consider the example DataFrame we've been looking at:
 
 Suppose we wish to compute the standard deviation grouped by the ``A``
 column. There is a slight problem, namely that we don't care about the data in
-column ``B``. We refer to this as a "nuisance" column. If the passed
-aggregation function can't be applied to some columns, the troublesome columns
-will be (silently) dropped. Thus, this does not pose any problems:
+column ``B``. We refer to this as a "nuisance" column. You can avoid nuisance
+columns by specifying ``numeric_only=True``:
 
 .. ipython:: python
 
-   df.groupby("A").std()
+   df.groupby("A").std(numeric_only=True)
 
 Note that ``df.groupby('A').colname.std().`` is more efficient than
 ``df.groupby('A').std().colname``, so if the result of an aggregation function
@@ -1180,7 +1179,14 @@ is only interesting over one column (here ``colname``), it may be filtered
    If you do wish to include decimal or object columns in an aggregation with
    other non-nuisance data types, you must do so explicitly.
 
+.. warning::
+   The automatic dropping of nuisance columns has been deprecated and will be removed
+   in a future version of pandas. If columns are included that cannot be operated
+   on, pandas will instead raise an error. In order to avoid this, either select
+   the columns you wish to operate on or specify ``numeric_only=True``.
+
 .. ipython:: python
+    :okwarning:
 
     from decimal import Decimal
 
@@ -1304,7 +1310,7 @@ Groupby a specific column with the desired frequency. This is like resampling.
 
 .. ipython:: python
 
-   df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"]).sum()
+   df.groupby([pd.Grouper(freq="1M", key="Date"), "Buyer"])[["Quantity"]].sum()
 
 You have an ambiguous specification in that you have a named index and a column
 that could be potential groupers.
@@ -1313,9 +1319,9 @@ that could be potential groupers.
 
    df = df.set_index("Date")
    df["Date"] = df.index + pd.offsets.MonthEnd(2)
-   df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"]).sum()
+   df.groupby([pd.Grouper(freq="6M", key="Date"), "Buyer"])[["Quantity"]].sum()
 
-   df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"]).sum()
+   df.groupby([pd.Grouper(freq="6M", level="Date"), "Buyer"])[["Quantity"]].sum()
 
 
 Taking the first rows of each group
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index a94681924d211..3c08b5a498eea 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -583,7 +583,7 @@ without using a temporary variable.
 .. ipython:: python
 
    bb = pd.read_csv('data/baseball.csv', index_col='id')
-   (bb.groupby(['year', 'team']).sum()
+   (bb.groupby(['year', 'team']).sum(numeric_only=True)
       .loc[lambda df: df['r'] > 100])
 
 
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index f9e68b1b39ddc..b24890564d1bf 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -414,12 +414,11 @@ We can produce pivot tables from this data very easily:
 
 The result object is a :class:`DataFrame` having potentially hierarchical indexes on the
 rows and columns. If the ``values`` column name is not given, the pivot table
-will include all of the data that can be aggregated in an additional level of
-hierarchy in the columns:
+will include all of the data in an additional level of hierarchy in the columns:
 
 .. ipython:: python
 
-   pd.pivot_table(df, index=["A", "B"], columns=["C"])
+   pd.pivot_table(df[["A", "B", "C", "D", "E"]], index=["A", "B"], columns=["C"])
 
 Also, you can use :class:`Grouper` for ``index`` and ``columns`` keywords. For detail of :class:`Grouper`, see :ref:`Grouping with a Grouper specification <groupby.specify>`.
 
@@ -432,7 +431,7 @@ calling :meth:`~DataFrame.to_string` if you wish:
 
 .. ipython:: python
 
-   table = pd.pivot_table(df, index=["A", "B"], columns=["C"])
+   table = pd.pivot_table(df, index=["A", "B"], columns=["C"], values=["D", "E"])
    print(table.to_string(na_rep=""))
 
 Note that :meth:`~DataFrame.pivot_table` is also available as an instance method on DataFrame,
@@ -449,7 +448,13 @@ rows and columns:
 
 .. ipython:: python
 
-   table = df.pivot_table(index=["A", "B"], columns="C", margins=True, aggfunc=np.std)
+   table = df.pivot_table(
+       index=["A", "B"],
+       columns="C",
+       values=["D", "E"],
+       margins=True,
+       aggfunc=np.std
+   )
    table
 
 Additionally, you can call :meth:`DataFrame.stack` to display a pivoted DataFrame
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
index 582620d8b6479..c67d028b65b3e 100644
--- a/doc/source/user_guide/timeseries.rst
+++ b/doc/source/user_guide/timeseries.rst
@@ -1821,7 +1821,7 @@ to resample based on datetimelike column in the frame, it can passed to the
        ),
    )
    df
-   df.resample("M", on="date").sum()
+   df.resample("M", on="date")[["a"]].sum()
 
 Similarly, if you instead want to resample by a datetimelike
 level of ``MultiIndex``, its name or location can be passed to the
@@ -1829,7 +1829,7 @@ level of ``MultiIndex``, its name or location can be passed to the
 
 .. ipython:: python
 
-   df.resample("M", level="d").sum()
+   df.resample("M", level="d")[["a"]].sum()
 
 .. _timeseries.iterating-label:
 
diff --git a/doc/source/whatsnew/v0.18.1.rst b/doc/source/whatsnew/v0.18.1.rst
index f873d320822ae..7d9008fdbdecd 100644
--- a/doc/source/whatsnew/v0.18.1.rst
+++ b/doc/source/whatsnew/v0.18.1.rst
@@ -166,7 +166,7 @@ without using temporary variable.
 .. ipython:: python
 
    bb = pd.read_csv("data/baseball.csv", index_col="id")
-   (bb.groupby(["year", "team"]).sum().loc[lambda df: df.r > 100])
+   (bb.groupby(["year", "team"]).sum(numeric_only=True).loc[lambda df: df.r > 100])
 
 .. _whatsnew_0181.partial_string_indexing:
 
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index a2bb935c708bc..113bbcf0a05bc 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -497,8 +497,8 @@ Other enhancements
          ),
      )
      df
-     df.resample("M", on="date").sum()
-     df.resample("M", level="d").sum()
+     df.resample("M", on="date")[["a"]].sum()
+     df.resample("M", level="d")[["a"]].sum()
 
 - The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials <https://developers.google.com/identity/protocols/application-default-credentials>`__. See the docs for more details (:issue:`13577`).
 - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`)
diff --git a/doc/source/whatsnew/v1.4.3.rst b/doc/source/whatsnew/v1.4.3.rst
index 23c8ad63bf7bb..7c09eec212d69 100644
--- a/doc/source/whatsnew/v1.4.3.rst
+++ b/doc/source/whatsnew/v1.4.3.rst
@@ -16,6 +16,7 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`DataFrame.nsmallest` led to wrong results when ``np.nan`` in the sorting column (:issue:`46589`)
 - Fixed regression in :func:`read_fwf` raising ``ValueError`` when ``widths`` was specified with ``usecols`` (:issue:`46580`)
+- Fixed regression in :meth:`.Groupby.transform` and :meth:`.Groupby.agg` failing with ``engine="numba"`` when the index was a :class:`MultiIndex` (:issue:`46867`)
 - Fixed regression is :meth:`.Styler.to_latex` and :meth:`.Styler.to_html` where ``buf`` failed in combination with ``encoding`` (:issue:`47053`)
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
index 603c2f081a31a..53e003e2ed7dd 100644
--- a/pandas/_testing/__init__.py
+++ b/pandas/_testing/__init__.py
@@ -3,7 +3,6 @@
 import collections
 from datetime import datetime
 from decimal import Decimal
-from functools import wraps
 import operator
 import os
 import re
@@ -749,55 +748,6 @@ def makeMissingDataframe(density=0.9, random_state=None):
     return df
 
 
-def test_parallel(num_threads=2, kwargs_list=None):
-    """
-    Decorator to run the same function multiple times in parallel.
-
-    Parameters
-    ----------
-    num_threads : int, optional
-        The number of times the function is run in parallel.
-    kwargs_list : list of dicts, optional
-        The list of kwargs to update original
-        function kwargs on different threads.
-
-    Notes
-    -----
-    This decorator does not pass the return value of the decorated function.
-
-    Original from scikit-image:
-
-    https://github.com/scikit-image/scikit-image/pull/1519
-
-    """
-    assert num_threads > 0
-    has_kwargs_list = kwargs_list is not None
-    if has_kwargs_list:
-        assert len(kwargs_list) == num_threads
-    import threading
-
-    def wrapper(func):
-        @wraps(func)
-        def inner(*args, **kwargs):
-            if has_kwargs_list:
-                update_kwargs = lambda i: dict(kwargs, **kwargs_list[i])
-            else:
-                update_kwargs = lambda i: kwargs
-            threads = []
-            for i in range(num_threads):
-                updated_kwargs = update_kwargs(i)
-                thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs)
-                threads.append(thread)
-            for thread in threads:
-                thread.start()
-            for thread in threads:
-                thread.join()
-
-        return inner
-
-    return wrapper
-
-
 class SubclassedSeries(Series):
     _metadata = ["testattr", "name"]
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 0203d54e0de86..f7c89b6e7dc49 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1310,7 +1310,16 @@ def _numba_prep(self, data):
         sorted_ids = algorithms.take_nd(ids, sorted_index, allow_fill=False)
 
         sorted_data = data.take(sorted_index, axis=self.axis).to_numpy()
-        sorted_index_data = data.index.take(sorted_index).to_numpy()
+        if len(self.grouper.groupings) > 1:
+            raise NotImplementedError(
+                "More than 1 grouping labels are not supported with engine='numba'"
+            )
+        # GH 46867
+        index_data = data.index
+        if isinstance(index_data, MultiIndex):
+            group_key = self.grouper.groupings[0].name
+            index_data = index_data.get_level_values(group_key)
+        sorted_index_data = index_data.take(sorted_index).to_numpy()
 
         starts, ends = lib.generate_slices(sorted_ids, ngroups)
         return (
diff --git a/pandas/tests/groupby/aggregate/test_numba.py b/pandas/tests/groupby/aggregate/test_numba.py
index ba58ac27284b8..9f71c2c2fa0b6 100644
--- a/pandas/tests/groupby/aggregate/test_numba.py
+++ b/pandas/tests/groupby/aggregate/test_numba.py
@@ -211,3 +211,30 @@ def func_kwargs(values, index):
     )
     expected = DataFrame({"value": [1.0, 1.0, 1.0]})
     tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("numba")
+def test_multiindex_one_key(nogil, parallel, nopython):
+    def numba_func(values, index):
+        return 1
+
+    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    result = df.groupby("A").agg(
+        numba_func, engine="numba", engine_kwargs=engine_kwargs
+    )
+    expected = DataFrame([1.0], index=Index([1], name="A"), columns=["C"])
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("numba")
+def test_multiindex_multi_key_not_supported(nogil, parallel, nopython):
+    def numba_func(values, index):
+        return 1
+
+    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    with pytest.raises(NotImplementedError, match="More than 1 grouping labels"):
+        df.groupby(["A", "B"]).agg(
+            numba_func, engine="numba", engine_kwargs=engine_kwargs
+        )
diff --git a/pandas/tests/groupby/transform/test_numba.py b/pandas/tests/groupby/transform/test_numba.py
index a404e0b9304cc..1b8570dbdc21d 100644
--- a/pandas/tests/groupby/transform/test_numba.py
+++ b/pandas/tests/groupby/transform/test_numba.py
@@ -199,3 +199,30 @@ def func_kwargs(values, index):
     )
     expected = DataFrame({"value": [1.0, 1.0, 1.0]})
     tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("numba")
+def test_multiindex_one_key(nogil, parallel, nopython):
+    def numba_func(values, index):
+        return 1
+
+    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    result = df.groupby("A").transform(
+        numba_func, engine="numba", engine_kwargs=engine_kwargs
+    )
+    expected = DataFrame([{"A": 1, "B": 2, "C": 1.0}]).set_index(["A", "B"])
+    tm.assert_frame_equal(result, expected)
+
+
+@td.skip_if_no("numba")
+def test_multiindex_multi_key_not_supported(nogil, parallel, nopython):
+    def numba_func(values, index):
+        return 1
+
+    df = DataFrame([{"A": 1, "B": 2, "C": 3}]).set_index(["A", "B"])
+    engine_kwargs = {"nopython": nopython, "nogil": nogil, "parallel": parallel}
+    with pytest.raises(NotImplementedError, match="More than 1 grouping labels"):
+        df.groupby(["A", "B"]).transform(
+            numba_func, engine="numba", engine_kwargs=engine_kwargs
+        )

From 6b553919964bd8ea93a7e75676ba8f6271cba1d2 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Sat, 21 May 2022 19:47:17 +0200
Subject: [PATCH 09/17] lint file

---
 pandas/core/reshape/concat.py | 406 +++++++++++++++++-----------------
 1 file changed, 203 insertions(+), 203 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index e920a49ecdde3..a48f8d3588c3a 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -157,211 +157,211 @@ def concat(
     copy: bool = True,
 ) -> DataFrame | Series:
     """
-    Concatenate pandas objects along a particular axis with optional set logic
-    along the other axes.
-
-    Can also add a layer of hierarchical indexing on the concatenation axis,
-    which may be useful if the labels are the same (or overlapping) on
-    the passed axis number.
-
-    Parameters
-    ----------
-    objs : a sequence or mapping of Series or DataFrame objects
-        If a mapping is passed, the sorted keys will be used as the `keys`
-        argument, unless it is passed, in which case the values will be
-        selected (see below). Any None objects will be dropped silently unless
-        they are all None in which case a ValueError will be raised.
-    axis : {0/'index', 1/'columns'}, default 0
-        The axis to concatenate along.
-    join : {'inner', 'outer'}, default 'outer'
-        How to handle indexes on other axis (or axes).
-    ignore_index : bool, default False
-        If True, do not use the index values along the concatenation axis. The
-        resulting axis will be labeled 0, ..., n - 1. This is useful if you are
-        concatenating objects where the concatenation axis does not have
-        meaningful indexing information. Note the index values on the other
-        axes are still respected in the join.
-    keys : sequence, default None
-        If multiple levels passed, should contain tuples. Construct
-        hierarchical index using the passed keys as the outermost level.
-    levels : list of sequences, default None
-        Specific levels (unique values) to use for constructing a
-        MultiIndex. Otherwise they will be inferred from the keys.
-    names : list, default None
-        Names for the levels in the resulting hierarchical index.
-    verify_integrity : bool, default False
-        Check whether the new concatenated axis contains duplicates. This can
-        be very expensive relative to the actual data concatenation.
-    sort : bool, default False
-        Sort non-concatenation axis if it is not already aligned when `join`
-        is 'outer'.
-        This has no effect when ``join='inner'``, which already preserves
-        the order of the non-concatenation axis.
-
-        .. versionchanged:: 1.0.0
-
-           Changed to not sort by default.
-
-    copy : bool, default True
-        If False, do not copy data unnecessarily.
-
-    Returns
-    -------
-    object, type of objs
-        When concatenating all ``Series`` along the index (axis=0), a
-        ``Series`` is returned. When ``objs`` contains at least one
-        ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
-        the columns (axis=1), a ``DataFrame`` is returned.
-
-    See Also
-    --------
-    DataFrame.join : Join DataFrames using indexes.
-    DataFrame.merge : Merge DataFrames by indexes or columns.
-
-    Notes
-    -----
-    The keys, levels, and names arguments are all optional.
-
-    A walkthrough of how this method fits in with other tools for combining
-    pandas objects can be found `here
-    <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
-
-    
-    It is not recommended to build DataFrames by adding single rows in a
-for loop. Build a list of rows and make a DataFrame in a single concat.
-
-    Examples
-    --------
-    Combine two ``Series``.
-
-    >>> s1 = pd.Series(['a', 'b'])
-    >>> s2 = pd.Series(['c', 'd'])
-    >>> pd.concat([s1, s2])
-    0    a
-    1    b
-    0    c
-    1    d
-    dtype: object
-
-    Clear the existing index and reset it in the result
-    by setting the ``ignore_index`` option to ``True``.
-
-    >>> pd.concat([s1, s2], ignore_index=True)
-    0    a
-    1    b
-    2    c
-    3    d
-    dtype: object
-
-    Add a hierarchical index at the outermost level of
-    the data with the ``keys`` option.
-
-    >>> pd.concat([s1, s2], keys=['s1', 's2'])
-    s1  0    a
+        Concatenate pandas objects along a particular axis with optional set logic
+        along the other axes.
+
+        Can also add a layer of hierarchical indexing on the concatenation axis,
+        which may be useful if the labels are the same (or overlapping) on
+        the passed axis number.
+
+        Parameters
+        ----------
+        objs : a sequence or mapping of Series or DataFrame objects
+            If a mapping is passed, the sorted keys will be used as the `keys`
+            argument, unless it is passed, in which case the values will be
+            selected (see below). Any None objects will be dropped silently unless
+            they are all None in which case a ValueError will be raised.
+        axis : {0/'index', 1/'columns'}, default 0
+            The axis to concatenate along.
+        join : {'inner', 'outer'}, default 'outer'
+            How to handle indexes on other axis (or axes).
+        ignore_index : bool, default False
+            If True, do not use the index values along the concatenation axis. The
+            resulting axis will be labeled 0, ..., n - 1. This is useful if you are
+            concatenating objects where the concatenation axis does not have
+            meaningful indexing information. Note the index values on the other
+            axes are still respected in the join.
+        keys : sequence, default None
+            If multiple levels passed, should contain tuples. Construct
+            hierarchical index using the passed keys as the outermost level.
+        levels : list of sequences, default None
+            Specific levels (unique values) to use for constructing a
+            MultiIndex. Otherwise they will be inferred from the keys.
+        names : list, default None
+            Names for the levels in the resulting hierarchical index.
+        verify_integrity : bool, default False
+            Check whether the new concatenated axis contains duplicates. This can
+            be very expensive relative to the actual data concatenation.
+        sort : bool, default False
+            Sort non-concatenation axis if it is not already aligned when `join`
+            is 'outer'.
+            This has no effect when ``join='inner'``, which already preserves
+            the order of the non-concatenation axis.
+
+            .. versionchanged:: 1.0.0
+
+               Changed to not sort by default.
+
+        copy : bool, default True
+            If False, do not copy data unnecessarily.
+
+        Returns
+        -------
+        object, type of objs
+            When concatenating all ``Series`` along the index (axis=0), a
+            ``Series`` is returned. When ``objs`` contains at least one
+            ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
+            the columns (axis=1), a ``DataFrame`` is returned.
+
+        See Also
+        --------
+        DataFrame.join : Join DataFrames using indexes.
+        DataFrame.merge : Merge DataFrames by indexes or columns.
+
+        Notes
+        -----
+        The keys, levels, and names arguments are all optional.
+
+        A walkthrough of how this method fits in with other tools for combining
+        pandas objects can be found `here
+        <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
+
+
+        It is not recommended to build DataFrames by adding single rows in a
+    for loop. Build a list of rows and make a DataFrame in a single concat.
+
+        Examples
+        --------
+        Combine two ``Series``.
+
+        >>> s1 = pd.Series(['a', 'b'])
+        >>> s2 = pd.Series(['c', 'd'])
+        >>> pd.concat([s1, s2])
+        0    a
         1    b
-    s2  0    c
+        0    c
         1    d
-    dtype: object
-
-    Label the index keys you create with the ``names`` option.
-
-    >>> pd.concat([s1, s2], keys=['s1', 's2'],
-    ...           names=['Series name', 'Row ID'])
-    Series name  Row ID
-    s1           0         a
-                 1         b
-    s2           0         c
-                 1         d
-    dtype: object
-
-    Combine two ``DataFrame`` objects with identical columns.
-
-    >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
-    ...                    columns=['letter', 'number'])
-    >>> df1
-      letter  number
-    0      a       1
-    1      b       2
-    >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
-    ...                    columns=['letter', 'number'])
-    >>> df2
-      letter  number
-    0      c       3
-    1      d       4
-    >>> pd.concat([df1, df2])
-      letter  number
-    0      a       1
-    1      b       2
-    0      c       3
-    1      d       4
-
-    Combine ``DataFrame`` objects with overlapping columns
-    and return everything. Columns outside the intersection will
-    be filled with ``NaN`` values.
-
-    >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
-    ...                    columns=['letter', 'number', 'animal'])
-    >>> df3
-      letter  number animal
-    0      c       3    cat
-    1      d       4    dog
-    >>> pd.concat([df1, df3], sort=False)
-      letter  number animal
-    0      a       1    NaN
-    1      b       2    NaN
-    0      c       3    cat
-    1      d       4    dog
-
-    Combine ``DataFrame`` objects with overlapping columns
-    and return only those that are shared by passing ``inner`` to
-    the ``join`` keyword argument.
-
-    >>> pd.concat([df1, df3], join="inner")
-      letter  number
-    0      a       1
-    1      b       2
-    0      c       3
-    1      d       4
-
-    Combine ``DataFrame`` objects horizontally along the x axis by
-    passing in ``axis=1``.
-
-    >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
-    ...                    columns=['animal', 'name'])
-    >>> pd.concat([df1, df4], axis=1)
-      letter  number  animal    name
-    0      a       1    bird   polly
-    1      b       2  monkey  george
-
-    Prevent the result from including duplicate index values with the
-    ``verify_integrity`` option.
-
-    >>> df5 = pd.DataFrame([1], index=['a'])
-    >>> df5
-       0
-    a  1
-    >>> df6 = pd.DataFrame([2], index=['a'])
-    >>> df6
-       0
-    a  2
-    >>> pd.concat([df5, df6], verify_integrity=True)
-    Traceback (most recent call last):
-        ...
-    ValueError: Indexes have overlapping values: ['a']
-
-    Append a single row to the end of a ``DataFrame`` object.
-
-    >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
-    >>> df7
-        a   b
-    0   1   2
-    >>> new_row = pd.Series({'a': 3, 'b': 4})
-    >>> new_row
-    0   3   4
-    >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
-         a    b
-    0    1    2
-    1    3    4
+        dtype: object
+
+        Clear the existing index and reset it in the result
+        by setting the ``ignore_index`` option to ``True``.
+
+        >>> pd.concat([s1, s2], ignore_index=True)
+        0    a
+        1    b
+        2    c
+        3    d
+        dtype: object
+
+        Add a hierarchical index at the outermost level of
+        the data with the ``keys`` option.
+
+        >>> pd.concat([s1, s2], keys=['s1', 's2'])
+        s1  0    a
+            1    b
+        s2  0    c
+            1    d
+        dtype: object
+
+        Label the index keys you create with the ``names`` option.
+
+        >>> pd.concat([s1, s2], keys=['s1', 's2'],
+        ...           names=['Series name', 'Row ID'])
+        Series name  Row ID
+        s1           0         a
+                     1         b
+        s2           0         c
+                     1         d
+        dtype: object
+
+        Combine two ``DataFrame`` objects with identical columns.
+
+        >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
+        ...                    columns=['letter', 'number'])
+        >>> df1
+          letter  number
+        0      a       1
+        1      b       2
+        >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
+        ...                    columns=['letter', 'number'])
+        >>> df2
+          letter  number
+        0      c       3
+        1      d       4
+        >>> pd.concat([df1, df2])
+          letter  number
+        0      a       1
+        1      b       2
+        0      c       3
+        1      d       4
+
+        Combine ``DataFrame`` objects with overlapping columns
+        and return everything. Columns outside the intersection will
+        be filled with ``NaN`` values.
+
+        >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
+        ...                    columns=['letter', 'number', 'animal'])
+        >>> df3
+          letter  number animal
+        0      c       3    cat
+        1      d       4    dog
+        >>> pd.concat([df1, df3], sort=False)
+          letter  number animal
+        0      a       1    NaN
+        1      b       2    NaN
+        0      c       3    cat
+        1      d       4    dog
+
+        Combine ``DataFrame`` objects with overlapping columns
+        and return only those that are shared by passing ``inner`` to
+        the ``join`` keyword argument.
+
+        >>> pd.concat([df1, df3], join="inner")
+          letter  number
+        0      a       1
+        1      b       2
+        0      c       3
+        1      d       4
+
+        Combine ``DataFrame`` objects horizontally along the x axis by
+        passing in ``axis=1``.
+
+        >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
+        ...                    columns=['animal', 'name'])
+        >>> pd.concat([df1, df4], axis=1)
+          letter  number  animal    name
+        0      a       1    bird   polly
+        1      b       2  monkey  george
+
+        Prevent the result from including duplicate index values with the
+        ``verify_integrity`` option.
+
+        >>> df5 = pd.DataFrame([1], index=['a'])
+        >>> df5
+           0
+        a  1
+        >>> df6 = pd.DataFrame([2], index=['a'])
+        >>> df6
+           0
+        a  2
+        >>> pd.concat([df5, df6], verify_integrity=True)
+        Traceback (most recent call last):
+            ...
+        ValueError: Indexes have overlapping values: ['a']
+
+        Append a single row to the end of a ``DataFrame`` object.
+
+        >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
+        >>> df7
+            a   b
+        0   1   2
+        >>> new_row = pd.Series({'a': 3, 'b': 4})
+        >>> new_row
+        0   3   4
+        >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
+             a    b
+        0    1    2
+        1    3    4
     """
     op = _Concatenator(
         objs,

From 28c9edeb1f0a8d64620dbea64a3b34abb8cef92a Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Sat, 21 May 2022 20:23:28 +0200
Subject: [PATCH 10/17] indentation fix

---
 pandas/core/reshape/concat.py | 402 +++++++++++++++++-----------------
 1 file changed, 201 insertions(+), 201 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index a48f8d3588c3a..5d33af1bd8701 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -157,211 +157,211 @@ def concat(
     copy: bool = True,
 ) -> DataFrame | Series:
     """
-        Concatenate pandas objects along a particular axis with optional set logic
-        along the other axes.
-
-        Can also add a layer of hierarchical indexing on the concatenation axis,
-        which may be useful if the labels are the same (or overlapping) on
-        the passed axis number.
-
-        Parameters
-        ----------
-        objs : a sequence or mapping of Series or DataFrame objects
-            If a mapping is passed, the sorted keys will be used as the `keys`
-            argument, unless it is passed, in which case the values will be
-            selected (see below). Any None objects will be dropped silently unless
-            they are all None in which case a ValueError will be raised.
-        axis : {0/'index', 1/'columns'}, default 0
-            The axis to concatenate along.
-        join : {'inner', 'outer'}, default 'outer'
-            How to handle indexes on other axis (or axes).
-        ignore_index : bool, default False
-            If True, do not use the index values along the concatenation axis. The
-            resulting axis will be labeled 0, ..., n - 1. This is useful if you are
-            concatenating objects where the concatenation axis does not have
-            meaningful indexing information. Note the index values on the other
-            axes are still respected in the join.
-        keys : sequence, default None
-            If multiple levels passed, should contain tuples. Construct
-            hierarchical index using the passed keys as the outermost level.
-        levels : list of sequences, default None
-            Specific levels (unique values) to use for constructing a
-            MultiIndex. Otherwise they will be inferred from the keys.
-        names : list, default None
-            Names for the levels in the resulting hierarchical index.
-        verify_integrity : bool, default False
-            Check whether the new concatenated axis contains duplicates. This can
-            be very expensive relative to the actual data concatenation.
-        sort : bool, default False
-            Sort non-concatenation axis if it is not already aligned when `join`
-            is 'outer'.
-            This has no effect when ``join='inner'``, which already preserves
-            the order of the non-concatenation axis.
-
-            .. versionchanged:: 1.0.0
-
-               Changed to not sort by default.
-
-        copy : bool, default True
-            If False, do not copy data unnecessarily.
-
-        Returns
-        -------
-        object, type of objs
-            When concatenating all ``Series`` along the index (axis=0), a
-            ``Series`` is returned. When ``objs`` contains at least one
-            ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
-            the columns (axis=1), a ``DataFrame`` is returned.
-
-        See Also
-        --------
-        DataFrame.join : Join DataFrames using indexes.
-        DataFrame.merge : Merge DataFrames by indexes or columns.
-
-        Notes
-        -----
-        The keys, levels, and names arguments are all optional.
-
-        A walkthrough of how this method fits in with other tools for combining
-        pandas objects can be found `here
-        <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
-
-
-        It is not recommended to build DataFrames by adding single rows in a
+    Concatenate pandas objects along a particular axis with optional set logic
+    along the other axes.
+
+    Can also add a layer of hierarchical indexing on the concatenation axis,
+    which may be useful if the labels are the same (or overlapping) on
+    the passed axis number.
+
+    Parameters
+    ----------
+    objs : a sequence or mapping of Series or DataFrame objects
+        If a mapping is passed, the sorted keys will be used as the `keys`
+        argument, unless it is passed, in which case the values will be
+        selected (see below). Any None objects will be dropped silently unless
+        they are all None in which case a ValueError will be raised.
+    axis : {0/'index', 1/'columns'}, default 0
+        The axis to concatenate along.
+    join : {'inner', 'outer'}, default 'outer'
+        How to handle indexes on other axis (or axes).
+    ignore_index : bool, default False
+        If True, do not use the index values along the concatenation axis. The
+        resulting axis will be labeled 0, ..., n - 1. This is useful if you are
+        concatenating objects where the concatenation axis does not have
+        meaningful indexing information. Note the index values on the other
+        axes are still respected in the join.
+    keys : sequence, default None
+        If multiple levels passed, should contain tuples. Construct
+        hierarchical index using the passed keys as the outermost level.
+    levels : list of sequences, default None
+        Specific levels (unique values) to use for constructing a
+        MultiIndex. Otherwise they will be inferred from the keys.
+    names : list, default None
+        Names for the levels in the resulting hierarchical index.
+    verify_integrity : bool, default False
+        Check whether the new concatenated axis contains duplicates. This can
+        be very expensive relative to the actual data concatenation.
+    sort : bool, default False
+        Sort non-concatenation axis if it is not already aligned when `join`
+        is 'outer'.
+        This has no effect when ``join='inner'``, which already preserves
+        the order of the non-concatenation axis.
+
+        .. versionchanged:: 1.0.0
+
+            Changed to not sort by default.
+
+    copy : bool, default True
+        If False, do not copy data unnecessarily.
+
+    Returns
+    -------
+    object, type of objs
+        When concatenating all ``Series`` along the index (axis=0), a
+        ``Series`` is returned. When ``objs`` contains at least one
+        ``DataFrame``, a ``DataFrame`` is returned. When concatenating along
+        the columns (axis=1), a ``DataFrame`` is returned.
+
+    See Also
+    --------
+    DataFrame.join : Join DataFrames using indexes.
+    DataFrame.merge : Merge DataFrames by indexes or columns.
+
+    Notes
+    -----
+    The keys, levels, and names arguments are all optional.
+
+    A walkthrough of how this method fits in with other tools for combining
+    pandas objects can be found `here
+    <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
+
+
+    It is not recommended to build DataFrames by adding single rows in a
     for loop. Build a list of rows and make a DataFrame in a single concat.
 
-        Examples
-        --------
-        Combine two ``Series``.
-
-        >>> s1 = pd.Series(['a', 'b'])
-        >>> s2 = pd.Series(['c', 'd'])
-        >>> pd.concat([s1, s2])
-        0    a
+    Examples
+    --------
+    Combine two ``Series``.
+
+    >>> s1 = pd.Series(['a', 'b'])
+    >>> s2 = pd.Series(['c', 'd'])
+    >>> pd.concat([s1, s2])
+    0    a
+    1    b
+    0    c
+    1    d
+    dtype: object
+
+    Clear the existing index and reset it in the result
+    by setting the ``ignore_index`` option to ``True``.
+
+    >>> pd.concat([s1, s2], ignore_index=True)
+    0    a
+    1    b
+    2    c
+    3    d
+    dtype: object
+
+    Add a hierarchical index at the outermost level of
+    the data with the ``keys`` option.
+
+    >>> pd.concat([s1, s2], keys=['s1', 's2'])
+    s1  0    a
         1    b
-        0    c
+    s2  0    c
         1    d
-        dtype: object
-
-        Clear the existing index and reset it in the result
-        by setting the ``ignore_index`` option to ``True``.
-
-        >>> pd.concat([s1, s2], ignore_index=True)
-        0    a
-        1    b
-        2    c
-        3    d
-        dtype: object
-
-        Add a hierarchical index at the outermost level of
-        the data with the ``keys`` option.
-
-        >>> pd.concat([s1, s2], keys=['s1', 's2'])
-        s1  0    a
-            1    b
-        s2  0    c
-            1    d
-        dtype: object
-
-        Label the index keys you create with the ``names`` option.
-
-        >>> pd.concat([s1, s2], keys=['s1', 's2'],
-        ...           names=['Series name', 'Row ID'])
-        Series name  Row ID
-        s1           0         a
-                     1         b
-        s2           0         c
-                     1         d
-        dtype: object
-
-        Combine two ``DataFrame`` objects with identical columns.
-
-        >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
-        ...                    columns=['letter', 'number'])
-        >>> df1
-          letter  number
-        0      a       1
-        1      b       2
-        >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
-        ...                    columns=['letter', 'number'])
-        >>> df2
-          letter  number
-        0      c       3
-        1      d       4
-        >>> pd.concat([df1, df2])
-          letter  number
-        0      a       1
-        1      b       2
-        0      c       3
-        1      d       4
-
-        Combine ``DataFrame`` objects with overlapping columns
-        and return everything. Columns outside the intersection will
-        be filled with ``NaN`` values.
-
-        >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
-        ...                    columns=['letter', 'number', 'animal'])
-        >>> df3
-          letter  number animal
-        0      c       3    cat
-        1      d       4    dog
-        >>> pd.concat([df1, df3], sort=False)
-          letter  number animal
-        0      a       1    NaN
-        1      b       2    NaN
-        0      c       3    cat
-        1      d       4    dog
-
-        Combine ``DataFrame`` objects with overlapping columns
-        and return only those that are shared by passing ``inner`` to
-        the ``join`` keyword argument.
-
-        >>> pd.concat([df1, df3], join="inner")
-          letter  number
-        0      a       1
-        1      b       2
-        0      c       3
-        1      d       4
-
-        Combine ``DataFrame`` objects horizontally along the x axis by
-        passing in ``axis=1``.
-
-        >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
-        ...                    columns=['animal', 'name'])
-        >>> pd.concat([df1, df4], axis=1)
-          letter  number  animal    name
-        0      a       1    bird   polly
-        1      b       2  monkey  george
-
-        Prevent the result from including duplicate index values with the
-        ``verify_integrity`` option.
-
-        >>> df5 = pd.DataFrame([1], index=['a'])
-        >>> df5
-           0
-        a  1
-        >>> df6 = pd.DataFrame([2], index=['a'])
-        >>> df6
-           0
-        a  2
-        >>> pd.concat([df5, df6], verify_integrity=True)
-        Traceback (most recent call last):
-            ...
-        ValueError: Indexes have overlapping values: ['a']
-
-        Append a single row to the end of a ``DataFrame`` object.
-
-        >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
-        >>> df7
-            a   b
-        0   1   2
-        >>> new_row = pd.Series({'a': 3, 'b': 4})
-        >>> new_row
-        0   3   4
-        >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
-             a    b
-        0    1    2
-        1    3    4
+    dtype: object
+
+    Label the index keys you create with the ``names`` option.
+
+    >>> pd.concat([s1, s2], keys=['s1', 's2'],
+    ...           names=['Series name', 'Row ID'])
+    Series name  Row ID
+    s1           0         a
+                    1         b
+    s2           0         c
+                    1         d
+    dtype: object
+
+    Combine two ``DataFrame`` objects with identical columns.
+
+    >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
+    ...                    columns=['letter', 'number'])
+    >>> df1
+        letter  number
+    0      a       1
+    1      b       2
+    >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
+    ...                    columns=['letter', 'number'])
+    >>> df2
+        letter  number
+    0      c       3
+    1      d       4
+    >>> pd.concat([df1, df2])
+        letter  number
+    0      a       1
+    1      b       2
+    0      c       3
+    1      d       4
+
+    Combine ``DataFrame`` objects with overlapping columns
+    and return everything. Columns outside the intersection will
+    be filled with ``NaN`` values.
+
+    >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
+    ...                    columns=['letter', 'number', 'animal'])
+    >>> df3
+        letter  number animal
+    0      c       3    cat
+    1      d       4    dog
+    >>> pd.concat([df1, df3], sort=False)
+        letter  number animal
+    0      a       1    NaN
+    1      b       2    NaN
+    0      c       3    cat
+    1      d       4    dog
+
+    Combine ``DataFrame`` objects with overlapping columns
+    and return only those that are shared by passing ``inner`` to
+    the ``join`` keyword argument.
+
+    >>> pd.concat([df1, df3], join="inner")
+        letter  number
+    0      a       1
+    1      b       2
+    0      c       3
+    1      d       4
+
+    Combine ``DataFrame`` objects horizontally along the x axis by
+    passing in ``axis=1``.
+
+    >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
+    ...                    columns=['animal', 'name'])
+    >>> pd.concat([df1, df4], axis=1)
+        letter  number  animal    name
+    0      a       1    bird   polly
+    1      b       2  monkey  george
+
+    Prevent the result from including duplicate index values with the
+    ``verify_integrity`` option.
+
+    >>> df5 = pd.DataFrame([1], index=['a'])
+    >>> df5
+        0
+    a  1
+    >>> df6 = pd.DataFrame([2], index=['a'])
+    >>> df6
+        0
+    a  2
+    >>> pd.concat([df5, df6], verify_integrity=True)
+    Traceback (most recent call last):
+        ...
+    ValueError: Indexes have overlapping values: ['a']
+
+    Append a single row to the end of a ``DataFrame`` object.
+
+    >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0])
+    >>> df7
+        a   b
+    0   1   2
+    >>> new_row = pd.Series({'a': 3, 'b': 4})
+    >>> new_row
+    0   3   4
+    >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
+            a    b
+    0    1    2
+    1    3    4
     """
     op = _Concatenator(
         objs,

From 721f63de38a4bdcafb27dcf49ffd1749ae2394e6 Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Sat, 21 May 2022 21:40:53 +0200
Subject: [PATCH 11/17] spaces fix

---
 pandas/core/reshape/concat.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 5d33af1bd8701..5d600e344759d 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -359,9 +359,9 @@ def concat(
     >>> new_row
     0   3   4
     >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
-            a    b
-    0    1    2
-    1    3    4
+        a   b
+    0   1   2
+    1   3   4
     """
     op = _Concatenator(
         objs,

From 83ed246535cd8ead9d46343a3424e6491382560b Mon Sep 17 00:00:00 2001
From: anetakahle <aneta.kahleova@gmail.com>
Date: Sat, 21 May 2022 22:33:54 +0200
Subject: [PATCH 12/17] small fix

---
 pandas/core/reshape/concat.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 5d600e344759d..5b3dedb57d6c6 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -357,7 +357,8 @@ def concat(
     0   1   2
     >>> new_row = pd.Series({'a': 3, 'b': 4})
     >>> new_row
-    0   3   4
+        a   3
+        b   4
     >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
         a   b
     0   1   2

From 6a1f1719b4f05a50398044516335cbe3735517fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aneta=20Kahleov=C3=A1?= <aneta.kahleova@gmail.com>
Date: Sat, 21 May 2022 23:07:35 +0200
Subject: [PATCH 13/17] removed unrelated white spaces

---
 pandas/core/reshape/concat.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 5b3dedb57d6c6..05ba07f184bfc 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -200,7 +200,7 @@ def concat(
 
         .. versionchanged:: 1.0.0
 
-            Changed to not sort by default.
+           Changed to not sort by default.
 
     copy : bool, default True
         If False, do not copy data unnecessarily.
@@ -269,9 +269,9 @@ def concat(
     ...           names=['Series name', 'Row ID'])
     Series name  Row ID
     s1           0         a
-                    1         b
+                 1         b
     s2           0         c
-                    1         d
+                 1         d
     dtype: object
 
     Combine two ``DataFrame`` objects with identical columns.
@@ -279,7 +279,7 @@ def concat(
     >>> df1 = pd.DataFrame([['a', 1], ['b', 2]],
     ...                    columns=['letter', 'number'])
     >>> df1
-        letter  number
+      letter  number
     0      a       1
     1      b       2
     >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
@@ -302,11 +302,11 @@ def concat(
     >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']],
     ...                    columns=['letter', 'number', 'animal'])
     >>> df3
-        letter  number animal
+      letter  number animal
     0      c       3    cat
     1      d       4    dog
     >>> pd.concat([df1, df3], sort=False)
-        letter  number animal
+      letter  number animal
     0      a       1    NaN
     1      b       2    NaN
     0      c       3    cat
@@ -317,7 +317,7 @@ def concat(
     the ``join`` keyword argument.
 
     >>> pd.concat([df1, df3], join="inner")
-        letter  number
+      letter  number
     0      a       1
     1      b       2
     0      c       3
@@ -329,7 +329,7 @@ def concat(
     >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']],
     ...                    columns=['animal', 'name'])
     >>> pd.concat([df1, df4], axis=1)
-        letter  number  animal    name
+      letter  number  animal    name
     0      a       1    bird   polly
     1      b       2  monkey  george
 
@@ -338,11 +338,11 @@ def concat(
 
     >>> df5 = pd.DataFrame([1], index=['a'])
     >>> df5
-        0
+       0
     a  1
     >>> df6 = pd.DataFrame([2], index=['a'])
     >>> df6
-        0
+       0
     a  2
     >>> pd.concat([df5, df6], verify_integrity=True)
     Traceback (most recent call last):

From 16c4cfde08492d426f66caabd1c159af42e1ce22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aneta=20Kahleov=C3=A1?= <aneta.kahleova@gmail.com>
Date: Sat, 21 May 2022 23:08:40 +0200
Subject: [PATCH 14/17] Update concat.py

---
 pandas/core/reshape/concat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 05ba07f184bfc..670f20b185d9d 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -285,11 +285,11 @@ def concat(
     >>> df2 = pd.DataFrame([['c', 3], ['d', 4]],
     ...                    columns=['letter', 'number'])
     >>> df2
-        letter  number
+      letter  number
     0      c       3
     1      d       4
     >>> pd.concat([df1, df2])
-        letter  number
+      letter  number
     0      a       1
     1      b       2
     0      c       3

From d0c8af37c95879fcb2ed893ea05693319d8a11b5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aneta=20Kahleov=C3=A1?= <aneta.kahleova@gmail.com>
Date: Sun, 22 May 2022 14:25:18 +0200
Subject: [PATCH 15/17] Update concat.py

---
 pandas/core/reshape/concat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 670f20b185d9d..cd82d14b37c86 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -357,8 +357,8 @@ def concat(
     0   1   2
     >>> new_row = pd.Series({'a': 3, 'b': 4})
     >>> new_row
-        a   3
-        b   4
+    a    3
+    b    4
     >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
         a   b
     0   1   2

From 9aa4b28876cb4f4f85c85e7f2302d40b763d800d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aneta=20Kahleov=C3=A1?= <aneta.kahleova@gmail.com>
Date: Sun, 22 May 2022 14:49:02 +0200
Subject: [PATCH 16/17] Update concat.py

---
 pandas/core/reshape/concat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index cd82d14b37c86..1e9afec765a23 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -359,6 +359,7 @@ def concat(
     >>> new_row
     a    3
     b    4
+    dtype: int64
     >>> pd.concat([df7, new_row.to_frame().T], ignore_index=True)
         a   b
     0   1   2

From 81b98096677a294b700be7eb7134be4a9d41a22a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aneta=20Kahleov=C3=A1?= <aneta.kahleova@gmail.com>
Date: Fri, 27 May 2022 12:25:36 +0200
Subject: [PATCH 17/17] Update concat.py

---
 pandas/core/reshape/concat.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 1e9afec765a23..523cd56db3e0a 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -226,7 +226,6 @@ def concat(
     pandas objects can be found `here
     <https://pandas.pydata.org/pandas-docs/stable/user_guide/merging.html>`__.
 
-
     It is not recommended to build DataFrames by adding single rows in a
     for loop. Build a list of rows and make a DataFrame in a single concat.