From 53ae9d6d09ecdb40e3f4dc3ef00a9b6015100ab4 Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Thu, 21 May 2020 16:56:04 -0400
Subject: [PATCH 1/6] CLN: Unify signatures in _libs.groupby

---
 pandas/_libs/groupby.pyx       | 91 ++++++++++++++++++----------------
 pandas/core/groupby/groupby.py | 36 +++++++++++++-
 2 files changed, 84 insertions(+), 43 deletions(-)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index d5d706650bb34..69ec9b697a847 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -714,10 +714,12 @@ group_ohlc_float64 = _group_ohlc['double']
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_quantile(ndarray[float64_t] out,
-                   ndarray[int64_t] labels,
-                   numeric[:] values,
-                   ndarray[uint8_t] mask,
+def group_quantile(floating[:, :] out,
+                   int64_t[:] counts,
+                   floating[:, :] values,
+                   const int64_t[:] labels,
+                   Py_ssize_t min_count,
+                   const uint8_t[:, :] mask,
                    float64_t q,
                    object interpolation):
     """
@@ -740,12 +742,12 @@ def group_quantile(ndarray[float64_t] out,
     provided `out` parameter.
     """
     cdef:
-        Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz
+        Py_ssize_t i, N=len(labels), K, ngroups, grp_sz=0, non_na_sz
         Py_ssize_t grp_start=0, idx=0
         int64_t lab
         uint8_t interp
         float64_t q_idx, frac, val, next_val
-        ndarray[int64_t] counts, non_na_counts, sort_arr
+        int64_t[:, :] non_na_counts, sort_arrs
 
     assert values.shape[0] == N
 
@@ -761,59 +763,64 @@ def group_quantile(ndarray[float64_t] out,
     }
     interp = inter_methods[interpolation]
 
-    counts = np.zeros_like(out, dtype=np.int64)
     non_na_counts = np.zeros_like(out, dtype=np.int64)
+    sort_arrs = np.empty_like(values, dtype=np.int64)
     ngroups = len(counts)
 
+    N, K = (<object>values).shape
+
     # First figure out the size of every group
     with nogil:
         for i in range(N):
             lab = labels[i]
             if lab == -1:  # NA group label
                 continue
-
             counts[lab] += 1
-            if not mask[i]:
-                non_na_counts[lab] += 1
+            for j in range(K):
+                if not mask[i, j]:
+                    non_na_counts[lab, j] += 1
 
-    # Get an index of values sorted by labels and then values
-    order = (values, labels)
-    sort_arr = np.lexsort(order).astype(np.int64, copy=False)
+    for j in range(K):
+        order = (values[:, j], labels)
+        r = np.lexsort(order).astype(np.int64, copy=False)
+        # TODO: Need better way to assign r to column j
+        for i in range(N):
+            sort_arrs[i, j] = r[i]
 
     with nogil:
         for i in range(ngroups):
             # Figure out how many group elements there are
             grp_sz = counts[i]
-            non_na_sz = non_na_counts[i]
-
-            if non_na_sz == 0:
-                out[i] = NaN
-            else:
-                # Calculate where to retrieve the desired value
-                # Casting to int will intentionally truncate result
-                idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
-
-                val = values[sort_arr[idx]]
-                # If requested quantile falls evenly on a particular index
-                # then write that index's value out. Otherwise interpolate
-                q_idx = q * (non_na_sz - 1)
-                frac = q_idx % 1
-
-                if frac == 0.0 or interp == INTERPOLATION_LOWER:
-                    out[i] = val
+            for j in range(K):
+                non_na_sz = non_na_counts[i, j]
+                if non_na_sz == 0:
+                    out[i, j] = NaN
                 else:
-                    next_val = values[sort_arr[idx + 1]]
-                    if interp == INTERPOLATION_LINEAR:
-                        out[i] = val + (next_val - val) * frac
-                    elif interp == INTERPOLATION_HIGHER:
-                        out[i] = next_val
-                    elif interp == INTERPOLATION_MIDPOINT:
-                        out[i] = (val + next_val) / 2.0
-                    elif interp == INTERPOLATION_NEAREST:
-                        if frac > .5 or (frac == .5 and q > .5):  # Always OK?
-                            out[i] = next_val
-                        else:
-                            out[i] = val
+                    # Calculate where to retrieve the desired value
+                    # Casting to int will intentionally truncate result
+                    idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
+
+                    val = values[sort_arrs[idx, j], j]
+                    # If requested quantile falls evenly on a particular index
+                    # then write that index's value out. Otherwise interpolate
+                    q_idx = q * (non_na_sz - 1)
+                    frac = q_idx % 1
+
+                    if frac == 0.0 or interp == INTERPOLATION_LOWER:
+                        out[i, j] = val
+                    else:
+                        next_val = values[sort_arrs[idx + 1, j], j]
+                        if interp == INTERPOLATION_LINEAR:
+                            out[i, j] = val + (next_val - val) * frac
+                        elif interp == INTERPOLATION_HIGHER:
+                            out[i, j] = next_val
+                        elif interp == INTERPOLATION_MIDPOINT:
+                            out[i, j] = (val + next_val) / 2.0
+                        elif interp == INTERPOLATION_NEAREST:
+                            if frac > .5 or (frac == .5 and q > .5):  # Always OK?
+                                out[i, j] = next_val
+                            else:
+                                out[i, j] = val
 
             # Increment the index reference in sorted_arr for the next group
             grp_start += grp_sz
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 37f2376d68d55..8af7a54772076 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2039,6 +2039,9 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
                 inference = "datetime64[ns]"
                 vals = np.asarray(vals).astype(np.float)
 
+            if vals.dtype != np.dtype(np.float64):
+                vals = vals.astype(np.float64)
+
             return vals, inference
 
         def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
@@ -2396,7 +2399,7 @@ def _get_cythonized_result(
         if result_is_index and aggregate:
             raise ValueError("'result_is_index' and 'aggregate' cannot both be True!")
         if post_processing:
-            if not callable(pre_processing):
+            if not callable(post_processing):
                 raise ValueError("'post_processing' must be a callable!")
         if pre_processing:
             if not callable(pre_processing):
@@ -2412,6 +2415,37 @@ def _get_cythonized_result(
         output: Dict[base.OutputKey, np.ndarray] = {}
         base_func = getattr(libgroupby, how)
 
+        if how == "group_quantile":
+            values = self._obj_with_exclusions._values
+            result_sz = ngroups if aggregate else len(values)
+
+            vals, inferences = pre_processing(values)
+            if self._obj_with_exclusions.ndim == 1:
+                width = 1
+                vals = np.reshape(vals, (-1, 1))
+            else:
+                width = len(self._obj_with_exclusions.columns)
+            result = np.zeros((result_sz, width), dtype=cython_dtype)
+            counts = np.zeros(self.ngroups, dtype=np.int64)
+            mask = isna(vals).view(np.uint8)
+
+            func = partial(base_func, result, counts, vals, labels, -1, mask)
+            func(**kwargs)  # Call func to modify indexer values in place
+            result = post_processing(result, inferences)
+
+            if self._obj_with_exclusions.ndim == 1:
+                key = base.OutputKey(label=self._obj_with_exclusions.name, position=0)
+                output[key] = result[:, 0]
+            else:
+                for idx, name in enumerate(self._obj_with_exclusions.columns):
+                    key = base.OutputKey(label=name, position=idx)
+                    output[key] = result[:, idx]
+
+            if aggregate:
+                return self._wrap_aggregated_output(output)
+            else:
+                return self._wrap_transformed_output(output)
+
         for idx, obj in enumerate(self._iterate_slices()):
             name = obj.name
             values = obj._values

From 7985efb63d29c03617c82585a25959ad6316cc2e Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Wed, 27 May 2020 17:20:32 -0400
Subject: [PATCH 2/6] Complete rework

---
 pandas/_libs/groupby.pyx       | 100 ++++++++++++++++-----------------
 pandas/core/groupby/generic.py |   6 +-
 pandas/core/groupby/groupby.py |  99 ++++++++++++++++----------------
 3 files changed, 103 insertions(+), 102 deletions(-)

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 69ec9b697a847..c69b00950abaf 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -378,8 +378,8 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_any_all(uint8_t[:] out,
-                  const int64_t[:] labels,
                   const uint8_t[:] values,
+                  const int64_t[:] labels,
                   const uint8_t[:] mask,
                   object val_test,
                   bint skipna):
@@ -560,7 +560,8 @@ def _group_var(floating[:, :] out,
                int64_t[:] counts,
                floating[:, :] values,
                const int64_t[:] labels,
-               Py_ssize_t min_count=-1):
+               Py_ssize_t min_count=-1,
+               int64_t ddof=1):
     cdef:
         Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
         floating val, ct, oldmean
@@ -600,10 +601,10 @@ def _group_var(floating[:, :] out,
         for i in range(ncounts):
             for j in range(K):
                 ct = nobs[i, j]
-                if ct < 2:
+                if ct <= ddof:
                     out[i, j] = NAN
                 else:
-                    out[i, j] /= (ct - 1)
+                    out[i, j] /= (ct - ddof)
 
 
 group_var_float32 = _group_var['float']
@@ -714,12 +715,10 @@ group_ohlc_float64 = _group_ohlc['double']
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def group_quantile(floating[:, :] out,
-                   int64_t[:] counts,
-                   floating[:, :] values,
-                   const int64_t[:] labels,
-                   Py_ssize_t min_count,
-                   const uint8_t[:, :] mask,
+def group_quantile(ndarray[float64_t] out,
+                   numeric[:] values,
+                   ndarray[int64_t] labels,
+                   ndarray[uint8_t] mask,
                    float64_t q,
                    object interpolation):
     """
@@ -742,12 +741,12 @@ def group_quantile(floating[:, :] out,
     provided `out` parameter.
     """
     cdef:
-        Py_ssize_t i, N=len(labels), K, ngroups, grp_sz=0, non_na_sz
+        Py_ssize_t i, N=len(labels), ngroups, grp_sz, non_na_sz
         Py_ssize_t grp_start=0, idx=0
         int64_t lab
         uint8_t interp
         float64_t q_idx, frac, val, next_val
-        int64_t[:, :] non_na_counts, sort_arrs
+        ndarray[int64_t] counts, non_na_counts, sort_arr
 
     assert values.shape[0] == N
 
@@ -763,64 +762,59 @@ def group_quantile(floating[:, :] out,
     }
     interp = inter_methods[interpolation]
 
+    counts = np.zeros_like(out, dtype=np.int64)
     non_na_counts = np.zeros_like(out, dtype=np.int64)
-    sort_arrs = np.empty_like(values, dtype=np.int64)
     ngroups = len(counts)
 
-    N, K = (<object>values).shape
-
     # First figure out the size of every group
     with nogil:
         for i in range(N):
             lab = labels[i]
             if lab == -1:  # NA group label
                 continue
+
             counts[lab] += 1
-            for j in range(K):
-                if not mask[i, j]:
-                    non_na_counts[lab, j] += 1
+            if not mask[i]:
+                non_na_counts[lab] += 1
 
-    for j in range(K):
-        order = (values[:, j], labels)
-        r = np.lexsort(order).astype(np.int64, copy=False)
-        # TODO: Need better way to assign r to column j
-        for i in range(N):
-            sort_arrs[i, j] = r[i]
+    # Get an index of values sorted by labels and then values
+    order = (values, labels)
+    sort_arr = np.lexsort(order).astype(np.int64, copy=False)
 
     with nogil:
         for i in range(ngroups):
             # Figure out how many group elements there are
             grp_sz = counts[i]
-            for j in range(K):
-                non_na_sz = non_na_counts[i, j]
-                if non_na_sz == 0:
-                    out[i, j] = NaN
+            non_na_sz = non_na_counts[i]
+
+            if non_na_sz == 0:
+                out[i] = NaN
+            else:
+                # Calculate where to retrieve the desired value
+                # Casting to int will intentionally truncate result
+                idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
+
+                val = values[sort_arr[idx]]
+                # If requested quantile falls evenly on a particular index
+                # then write that index's value out. Otherwise interpolate
+                q_idx = q * (non_na_sz - 1)
+                frac = q_idx % 1
+
+                if frac == 0.0 or interp == INTERPOLATION_LOWER:
+                    out[i] = val
                 else:
-                    # Calculate where to retrieve the desired value
-                    # Casting to int will intentionally truncate result
-                    idx = grp_start + <int64_t>(q * <float64_t>(non_na_sz - 1))
-
-                    val = values[sort_arrs[idx, j], j]
-                    # If requested quantile falls evenly on a particular index
-                    # then write that index's value out. Otherwise interpolate
-                    q_idx = q * (non_na_sz - 1)
-                    frac = q_idx % 1
-
-                    if frac == 0.0 or interp == INTERPOLATION_LOWER:
-                        out[i, j] = val
-                    else:
-                        next_val = values[sort_arrs[idx + 1, j], j]
-                        if interp == INTERPOLATION_LINEAR:
-                            out[i, j] = val + (next_val - val) * frac
-                        elif interp == INTERPOLATION_HIGHER:
-                            out[i, j] = next_val
-                        elif interp == INTERPOLATION_MIDPOINT:
-                            out[i, j] = (val + next_val) / 2.0
-                        elif interp == INTERPOLATION_NEAREST:
-                            if frac > .5 or (frac == .5 and q > .5):  # Always OK?
-                                out[i, j] = next_val
-                            else:
-                                out[i, j] = val
+                    next_val = values[sort_arr[idx + 1]]
+                    if interp == INTERPOLATION_LINEAR:
+                        out[i] = val + (next_val - val) * frac
+                    elif interp == INTERPOLATION_HIGHER:
+                        out[i] = next_val
+                    elif interp == INTERPOLATION_MIDPOINT:
+                        out[i] = (val + next_val) / 2.0
+                    elif interp == INTERPOLATION_NEAREST:
+                        if frac > .5 or (frac == .5 and q > .5):  # Always OK?
+                            out[i] = next_val
+                        else:
+                            out[i] = val
 
             # Increment the index reference in sorted_arr for the next group
             grp_start += grp_sz
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 69b143febeea2..53bc9c954d9ec 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1721,7 +1721,11 @@ def _wrap_aggregated_output(
         DataFrame
         """
         indexed_output = {key.position: val for key, val in output.items()}
-        columns = Index(key.label for key in output)
+        if self.axis == 0:
+            name = self._obj_with_exclusions.columns.name
+        else:
+            name = self._obj_with_exclusions.index.name
+        columns = Index([key.label for key in output], name=name)
 
         result = self.obj._constructor(indexed_output)
         result.columns = columns
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8af7a54772076..fa23df8a17d9b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1260,6 +1260,7 @@ def result_to_bool(result: np.ndarray, inference: Type) -> np.ndarray:
         return self._get_cythonized_result(
             "group_any_all",
             aggregate=True,
+            numeric_only=False,
             cython_dtype=np.dtype(np.uint8),
             needs_values=True,
             needs_mask=True,
@@ -1416,18 +1417,16 @@ def std(self, ddof: int = 1):
         Series or DataFrame
             Standard deviation of values within each group.
         """
-        result = self.var(ddof=ddof)
-        if result.ndim == 1:
-            result = np.sqrt(result)
-        else:
-            cols = result.columns.get_indexer_for(
-                result.columns.difference(self.exclusions).unique()
-            )
-            # TODO(GH-22046) - setting with iloc broken if labels are not unique
-            # .values to remove labels
-            result.iloc[:, cols] = np.sqrt(result.iloc[:, cols]).values
-
-        return result
+        return self._get_cythonized_result(
+            "group_var_float64",
+            aggregate=True,
+            needs_counts=True,
+            needs_values=True,
+            needs_2d=True,
+            cython_dtype=np.dtype(np.float64),
+            post_processing=lambda vals, inference: np.sqrt(vals),
+            ddof=ddof,
+        )
 
     @Substitution(name="groupby")
     @Appender(_common_see_also)
@@ -1756,6 +1755,7 @@ def _fill(self, direction, limit=None):
 
         return self._get_cythonized_result(
             "group_fillna_indexer",
+            numeric_only=False,
             needs_mask=True,
             cython_dtype=np.dtype(np.int64),
             result_is_index=True,
@@ -2039,9 +2039,6 @@ def pre_processor(vals: np.ndarray) -> Tuple[np.ndarray, Optional[Type]]:
                 inference = "datetime64[ns]"
                 vals = np.asarray(vals).astype(np.float)
 
-            if vals.dtype != np.dtype(np.float64):
-                vals = vals.astype(np.float64)
-
             return vals, inference
 
         def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
@@ -2059,6 +2056,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
             return self._get_cythonized_result(
                 "group_quantile",
                 aggregate=True,
+                numeric_only=False,
                 needs_values=True,
                 needs_mask=True,
                 cython_dtype=np.dtype(np.float64),
@@ -2348,7 +2346,11 @@ def _get_cythonized_result(
         how: str,
         cython_dtype: np.dtype,
         aggregate: bool = False,
+        numeric_only: bool = True,
+        needs_counts: bool = False,
         needs_values: bool = False,
+        needs_2d: bool = False,
+        min_count: Optional[int] = None,
         needs_mask: bool = False,
         needs_ngroups: bool = False,
         result_is_index: bool = False,
@@ -2367,9 +2369,18 @@ def _get_cythonized_result(
         aggregate : bool, default False
             Whether the result should be aggregated to match the number of
             groups
+        numeric_only : bool, default True
+            Whether only numeric datatypes should be computed
+        needs_counts : bool, default False
+            Whether the counts should be a part of the Cython call
         needs_values : bool, default False
             Whether the values should be a part of the Cython call
             signature
+        needs_2d : bool, default False
+            Whether the values and result of the Cython call signature
+            are 2-dimensional.
+        min_count : int, default None
+            When not None, min_count for the Cython call
         needs_mask : bool, default False
             Whether boolean mask needs to be part of the Cython call
             signature
@@ -2415,56 +2426,44 @@ def _get_cythonized_result(
         output: Dict[base.OutputKey, np.ndarray] = {}
         base_func = getattr(libgroupby, how)
 
-        if how == "group_quantile":
-            values = self._obj_with_exclusions._values
-            result_sz = ngroups if aggregate else len(values)
-
-            vals, inferences = pre_processing(values)
-            if self._obj_with_exclusions.ndim == 1:
-                width = 1
-                vals = np.reshape(vals, (-1, 1))
-            else:
-                width = len(self._obj_with_exclusions.columns)
-            result = np.zeros((result_sz, width), dtype=cython_dtype)
-            counts = np.zeros(self.ngroups, dtype=np.int64)
-            mask = isna(vals).view(np.uint8)
-
-            func = partial(base_func, result, counts, vals, labels, -1, mask)
-            func(**kwargs)  # Call func to modify indexer values in place
-            result = post_processing(result, inferences)
-
-            if self._obj_with_exclusions.ndim == 1:
-                key = base.OutputKey(label=self._obj_with_exclusions.name, position=0)
-                output[key] = result[:, 0]
-            else:
-                for idx, name in enumerate(self._obj_with_exclusions.columns):
-                    key = base.OutputKey(label=name, position=idx)
-                    output[key] = result[:, idx]
-
-            if aggregate:
-                return self._wrap_aggregated_output(output)
-            else:
-                return self._wrap_transformed_output(output)
-
         for idx, obj in enumerate(self._iterate_slices()):
             name = obj.name
             values = obj._values
 
+            if numeric_only and not is_numeric_dtype(values):
+                continue
+
             if aggregate:
                 result_sz = ngroups
             else:
                 result_sz = len(values)
 
-            result = np.zeros(result_sz, dtype=cython_dtype)
-            func = partial(base_func, result, labels)
+            if needs_2d:
+                result = np.zeros((result_sz, 1), dtype=cython_dtype)
+            else:
+                result = np.zeros(result_sz, dtype=cython_dtype)
+            func = partial(base_func, result)
+
             inferences = None
 
+            if needs_counts:
+                counts = np.zeros(self.ngroups, dtype=np.int64)
+                func = partial(func, counts)
+
             if needs_values:
                 vals = values
                 if pre_processing:
                     vals, inferences = pre_processing(vals)
+                if needs_2d:
+                    vals = vals.reshape((-1, 1))
+                vals = vals.astype(cython_dtype, copy=False)
                 func = partial(func, vals)
 
+            func = partial(func, labels)
+
+            if min_count is not None:
+                func = partial(func, min_count)
+
             if needs_mask:
                 mask = isna(values).view(np.uint8)
                 func = partial(func, mask)
@@ -2474,6 +2473,9 @@ def _get_cythonized_result(
 
             func(**kwargs)  # Call func to modify indexer values in place
 
+            if needs_2d:
+                result = result.reshape(-1)
+
             if result_is_index:
                 result = algorithms.take_nd(values, result)
 
@@ -2524,6 +2526,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
 
         return self._get_cythonized_result(
             "group_shift_indexer",
+            numeric_only=False,
             cython_dtype=np.dtype(np.int64),
             needs_ngroups=True,
             result_is_index=True,

From 5e21c7294991b7832f3478b585d16c3fa6d32be2 Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Tue, 9 Jun 2020 21:37:41 -0400
Subject: [PATCH 3/6] Simplified name logic in _wrap_aggregated_output

---
 pandas/core/groupby/generic.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 961b8a4cd863f..38cc61979fb10 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1731,10 +1731,7 @@ def _wrap_aggregated_output(
         DataFrame
         """
         indexed_output = {key.position: val for key, val in output.items()}
-        if self.axis == 0:
-            name = self._obj_with_exclusions.columns.name
-        else:
-            name = self._obj_with_exclusions.index.name
+        name = self._obj_with_exclusions._get_axis(1 - self.axis).name
         columns = Index([key.label for key in output], name=name)
 
         result = self.obj._constructor(indexed_output)

From 4d62493af238357ac0c450874762a28c5f05fe0e Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Sun, 14 Jun 2020 12:32:01 -0400
Subject: [PATCH 4/6] Renamed needs_2d -> needs_at_least2d

---
 pandas/core/groupby/groupby.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4672fb92c996a..2d02ee98ba428 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1439,7 +1439,7 @@ def std(self, ddof: int = 1):
             aggregate=True,
             needs_counts=True,
             needs_values=True,
-            needs_2d=True,
+            needs_at_least2d=True,
             cython_dtype=np.dtype(np.float64),
             post_processing=lambda vals, inference: np.sqrt(vals),
             ddof=ddof,
@@ -2366,7 +2366,7 @@ def _get_cythonized_result(
         numeric_only: bool = True,
         needs_counts: bool = False,
         needs_values: bool = False,
-        needs_2d: bool = False,
+        needs_at_least2d: bool = False,
         min_count: Optional[int] = None,
         needs_mask: bool = False,
         needs_ngroups: bool = False,
@@ -2393,9 +2393,9 @@ def _get_cythonized_result(
         needs_values : bool, default False
             Whether the values should be a part of the Cython call
             signature
-        needs_2d : bool, default False
+        needs_at_least2d : bool, default False
             Whether the values and result of the Cython call signature
-            are 2-dimensional.
+            are at least 2-dimensional.
         min_count : int, default None
             When not None, min_count for the Cython call
         needs_mask : bool, default False
@@ -2455,7 +2455,7 @@ def _get_cythonized_result(
             else:
                 result_sz = len(values)
 
-            if needs_2d:
+            if needs_at_least2d:
                 result = np.zeros((result_sz, 1), dtype=cython_dtype)
             else:
                 result = np.zeros(result_sz, dtype=cython_dtype)
@@ -2471,7 +2471,7 @@ def _get_cythonized_result(
                 vals = values
                 if pre_processing:
                     vals, inferences = pre_processing(vals)
-                if needs_2d:
+                if needs_at_least2d:
                     vals = vals.reshape((-1, 1))
                 vals = vals.astype(cython_dtype, copy=False)
                 func = partial(func, vals)
@@ -2490,7 +2490,7 @@ def _get_cythonized_result(
 
             func(**kwargs)  # Call func to modify indexer values in place
 
-            if needs_2d:
+            if needs_at_least2d:
                 result = result.reshape(-1)
 
             if result_is_index:

From f1c868ff67d3ed5b4db5108e49b31812face891c Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Mon, 15 Jun 2020 16:14:07 -0400
Subject: [PATCH 5/6] Revert renaming of needs_2d -> needs_at_least2d

---
 pandas/core/groupby/groupby.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 2d02ee98ba428..89982717a7df2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1439,7 +1439,7 @@ def std(self, ddof: int = 1):
             aggregate=True,
             needs_counts=True,
             needs_values=True,
-            needs_at_least2d=True,
+            needs_2d=True,
             cython_dtype=np.dtype(np.float64),
             post_processing=lambda vals, inference: np.sqrt(vals),
             ddof=ddof,
@@ -2366,7 +2366,7 @@ def _get_cythonized_result(
         numeric_only: bool = True,
         needs_counts: bool = False,
         needs_values: bool = False,
-        needs_at_least2d: bool = False,
+        needs_2d: bool = False,
         min_count: Optional[int] = None,
         needs_mask: bool = False,
         needs_ngroups: bool = False,
@@ -2393,7 +2393,7 @@ def _get_cythonized_result(
         needs_values : bool, default False
             Whether the values should be a part of the Cython call
             signature
-        needs_at_least2d : bool, default False
+        needs_2d : bool, default False
             Whether the values and result of the Cython call signature
             are at least 2-dimensional.
         min_count : int, default None
@@ -2455,7 +2455,7 @@ def _get_cythonized_result(
             else:
                 result_sz = len(values)
 
-            if needs_at_least2d:
+            if needs_2d:
                 result = np.zeros((result_sz, 1), dtype=cython_dtype)
             else:
                 result = np.zeros(result_sz, dtype=cython_dtype)
@@ -2471,7 +2471,7 @@ def _get_cythonized_result(
                 vals = values
                 if pre_processing:
                     vals, inferences = pre_processing(vals)
-                if needs_at_least2d:
+                if needs_2d:
                     vals = vals.reshape((-1, 1))
                 vals = vals.astype(cython_dtype, copy=False)
                 func = partial(func, vals)
@@ -2490,7 +2490,7 @@ def _get_cythonized_result(
 
             func(**kwargs)  # Call func to modify indexer values in place
 
-            if needs_at_least2d:
+            if needs_2d:
                 result = result.reshape(-1)
 
             if result_is_index:

From 33bf96a4b11b364cbe987ef4fd10a6e7b215ab69 Mon Sep 17 00:00:00 2001
From: Richard <rhshadrach@gmail.com>
Date: Thu, 18 Jun 2020 16:10:36 -0400
Subject: [PATCH 6/6] Requested change

---
 pandas/core/groupby/groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 60ac19c303e7f..b92e75f16e965 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2460,10 +2460,9 @@ def _get_cythonized_result(
             else:
                 result_sz = len(values)
 
+            result = np.zeros(result_sz, dtype=cython_dtype)
             if needs_2d:
-                result = np.zeros((result_sz, 1), dtype=cython_dtype)
-            else:
-                result = np.zeros(result_sz, dtype=cython_dtype)
+                result = result.reshape((-1, 1))
             func = partial(base_func, result)
 
             inferences = None