From 907e3c67e5c9b3584491a32832ae12ed18ba01c7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 19 May 2020 13:36:01 -0700 Subject: [PATCH 1/4] comments --- pandas/core/ops/__init__.py | 16 +++++++++++++--- pandas/core/ops/methods.py | 1 - 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 585e6d0eb0811..582a66bead7f3 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -155,7 +155,7 @@ def _maybe_match_name(a, b): # ----------------------------------------------------------------------------- -def _get_frame_op_default_axis(name): +def _get_frame_op_default_axis(name: str) -> Optional[str]: """ Only DataFrame cares about default_axis, specifically: special methods have default_axis=None and flex methods @@ -392,6 +392,7 @@ def _arith_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ + assert special # non-special uses _flex_method_SERIES str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -416,6 +417,7 @@ def _comp_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ + assert special # non-special uses _flex_method_SERIES str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -443,6 +445,7 @@ def _bool_method_SERIES(cls, op, special): Wrapper function for Series arithmetic operations, to avoid code duplication. """ + assert special # non-special uses _flex_method_SERIES op_name = _get_op_name(op, special) @unpack_zerodim_and_defer(op_name) @@ -461,6 +464,7 @@ def wrapper(self, other): def _flex_method_SERIES(cls, op, special): + assert not special # "special" also means "not flex" name = _get_op_name(op, special) doc = _make_flex_doc(name, "series") @@ -680,6 +684,7 @@ def _frame_arith_method_with_reindex( def _arith_method_FRAME(cls, op, special): + # This is the only function where `special` can be either True or False str_rep = _get_opstr(op) op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) @@ -701,18 +706,20 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): ): return _frame_arith_method_with_reindex(self, other, op) + # TODO: why are we passing flex=True instead of flex=not special? + # 15 tests fail if we pass flex=not special instead self, other = _align_method_FRAME(self, other, axis, flex=True, level=level) if isinstance(other, ABCDataFrame): # Another DataFrame pass_op = op if should_series_dispatch(self, other, op) else na_op - pass_op = pass_op if not is_logical else op - new_data = self._combine_frame(other, pass_op, fill_value) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, # so do not want the masked op. + # TODO: the above comment is no longer accurate since we now + # operate blockwise if other._values is an ndarray pass_op = op if axis in [0, "columns", None] else na_op pass_op = pass_op if not is_logical else op @@ -738,9 +745,11 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): def _flex_comp_method_FRAME(cls, op, special): + assert not special # "special" also means "not flex" str_rep = _get_opstr(op) op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) + assert default_axis == "columns", default_axis # because we are not "special" doc = _flex_comp_doc_FRAME.format( op_name=op_name, desc=_op_descriptions[op_name]["desc"] @@ -772,6 +781,7 @@ def f(self, other, axis=default_axis, level=None): def _comp_method_FRAME(cls, op, special): + assert special # "special" also means "not flex" str_rep = _get_opstr(op) op_name = _get_op_name(op, special) diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index 63086f62b6445..a4694a6e5134f 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -207,7 +207,6 @@ def _create_methods(cls, arith_method, comp_method, bool_method, special): dict( and_=bool_method(cls, operator.and_, special), or_=bool_method(cls, operator.or_, special), - # For some reason ``^`` wasn't used in original. xor=bool_method(cls, operator.xor, special), rand_=bool_method(cls, rand_, special), ror_=bool_method(cls, ror_, special), From 971a55f6b10bd22485fb5614337ec7c7169bbbda Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 20:31:26 -0700 Subject: [PATCH 2/4] CLN: always dispatch-to-series --- pandas/core/frame.py | 10 +--- pandas/core/ops/__init__.py | 8 ++- pandas/core/ops/array_ops.py | 14 +++-- pandas/core/ops/dispatch.py | 60 --------------------- pandas/tests/arithmetic/test_timedelta64.py | 5 +- 5 files changed, 14 insertions(+), 83 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2d181e826c2a9..8f75dd95c9a82 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -134,7 +134,6 @@ sanitize_index, to_arrays, ) -from pandas.core.ops.missing import dispatch_fill_zeros from pandas.core.series import Series from pandas.core.sorting import ensure_key_mapped @@ -5733,14 +5732,7 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - if ops.should_series_dispatch(self, other, func): - # iterate over columns - new_data = ops.dispatch_to_series(self, other, _arith_op) - else: - with np.errstate(all="ignore"): - res_values = _arith_op(self.values, other.values) - new_data = dispatch_fill_zeros(func, self.values, other.values, res_values) - + new_data = ops.dispatch_to_series(self, other, _arith_op) return new_data def _construct_result(self, result) -> "DataFrame": diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 582a66bead7f3..37e5252a8ae65 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -21,14 +21,12 @@ from pandas.core.ops.array_ops import ( arithmetic_op, comparison_op, - define_na_arithmetic_op, get_array_op, logical_op, ) from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401 from pandas.core.ops.blockwise import operate_blockwise from pandas.core.ops.common import unpack_zerodim_and_defer -from pandas.core.ops.dispatch import should_series_dispatch from pandas.core.ops.docstrings import ( _arith_doc_FRAME, _flex_comp_doc_FRAME, @@ -628,7 +626,7 @@ def to_series(right): def _should_reindex_frame_op( - left: "DataFrame", right, op, axis, default_axis: int, fill_value, level + left: "DataFrame", right, op, axis, default_axis, fill_value, level ) -> bool: """ Check if this is an operation between DataFrames that will need to reindex. @@ -689,7 +687,7 @@ def _arith_method_FRAME(cls, op, special): op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) - na_op = define_na_arithmetic_op(op, str_rep) + na_op = get_array_op(op, str_rep) is_logical = str_rep in ["&", "|", "^"] if op_name in _op_descriptions: @@ -712,7 +710,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame - pass_op = op if should_series_dispatch(self, other, op) else na_op + pass_op = op if is_logical else na_op new_data = self._combine_frame(other, pass_op, fill_value) elif isinstance(other, ABCSeries): diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index eef42592d2b30..e11f162559a66 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -121,13 +121,6 @@ def masked_arith_op(x: np.ndarray, y, op): return result -def define_na_arithmetic_op(op, str_rep: Optional[str]): - def na_op(x, y): - return na_arithmetic_op(x, y, op, str_rep) - - return na_op - - def na_arithmetic_op(left, right, op, str_rep: Optional[str], is_cmp: bool = False): """ Return the result of evaluating op on the passed in values. @@ -386,8 +379,13 @@ def get_array_op(op, str_rep: Optional[str] = None): Returns ------- - function + functools.partial """ + if isinstance(op, partial): + # We get here via dispatch_to_series in DataFrame case + # TODO: avoid getting here + return op + op_name = op.__name__.strip("_") if op_name in {"eq", "ne", "lt", "le", "gt", "ge"}: return partial(comparison_op, op=op, str_rep=str_rep) diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index a7dcdd4f9d585..bfd4afe0de86f 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -5,12 +5,6 @@ from pandas._typing import ArrayLike -from pandas.core.dtypes.common import ( - is_datetime64_dtype, - is_integer_dtype, - is_object_dtype, - is_timedelta64_dtype, -) from pandas.core.dtypes.generic import ABCExtensionArray @@ -28,57 +22,3 @@ def should_extension_dispatch(left: ArrayLike, right: Any) -> bool: bool """ return isinstance(left, ABCExtensionArray) or isinstance(right, ABCExtensionArray) - - -def should_series_dispatch(left, right, op): - """ - Identify cases where a DataFrame operation should dispatch to its - Series counterpart. - - Parameters - ---------- - left : DataFrame - right : DataFrame or Series - op : binary operator - - Returns - ------- - override : bool - """ - if left._is_mixed_type or right._is_mixed_type: - return True - - if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]: - # TODO: GH references for what this fixes - # Note: this check must come before the check for nonempty columns. - return True - - if right.ndim == 1: - # operating with Series, short-circuit checks that would fail - # with AttributeError. - return False - - if not len(left.columns) or not len(right.columns): - # ensure obj.dtypes[0] exists for each obj - return False - - ldtype = left.dtypes.iloc[0] - rdtype = right.dtypes.iloc[0] - - if ( - is_timedelta64_dtype(ldtype) - and (is_integer_dtype(rdtype) or is_object_dtype(rdtype)) - ) or ( - is_timedelta64_dtype(rdtype) - and (is_integer_dtype(ldtype) or is_object_dtype(ldtype)) - ): - # numpy integer dtypes as timedelta64 dtypes in this scenario - return True - - if (is_datetime64_dtype(ldtype) and is_object_dtype(rdtype)) or ( - is_datetime64_dtype(rdtype) and is_object_dtype(ldtype) - ): - # in particular case where one is an array of DateOffsets - return True - - return False diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 904846c5fa099..1fec059f11df5 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -552,7 +552,10 @@ def test_tda_add_dt64_object_array(self, box_with_array, tz_naive_fixture): obj = tm.box_expected(tdi, box) other = tm.box_expected(dti, box) - with tm.assert_produces_warning(PerformanceWarning): + warn = None + if box is not pd.DataFrame or tz_naive_fixture is None: + warn = PerformanceWarning + with tm.assert_produces_warning(warn): result = obj + other.astype(object) tm.assert_equal(result, other) From b75a31a7185b47f17b47ef89755a40acd3eab357 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 20 May 2020 21:11:41 -0700 Subject: [PATCH 3/4] More accurate assertions --- pandas/core/ops/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 37e5252a8ae65..5a94d99814c95 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -322,7 +322,10 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): return type(left)(bm) elif isinstance(right, ABCDataFrame): - assert right._indexed_same(left) + assert left.index.equals(right.index) + assert left.columns.equals(right.columns) + # TODO: The previous assertion `assert right._indexed_same(left)` + # fails in cases with shaoe[1] == 0 array_op = get_array_op(func, str_rep=str_rep) bm = operate_blockwise(left, right, array_op) @@ -710,8 +713,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame - pass_op = op if is_logical else na_op - new_data = self._combine_frame(other, pass_op, fill_value) + new_data = self._combine_frame(other, na_op, fill_value) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, From 6c761489a6cdfd97e9204208d7fdbd883554ec86 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 22 May 2020 11:15:27 -0700 Subject: [PATCH 4/4] clarify comment, annotations --- pandas/core/ops/__init__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 5a94d99814c95..8f97e9f0170bc 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -4,7 +4,7 @@ This is not a public API. """ import operator -from typing import TYPE_CHECKING, Optional, Set +from typing import TYPE_CHECKING, Optional, Set, Type import numpy as np @@ -325,7 +325,8 @@ def dispatch_to_series(left, right, func, str_rep=None, axis=None): assert left.index.equals(right.index) assert left.columns.equals(right.columns) # TODO: The previous assertion `assert right._indexed_same(left)` - # fails in cases with shaoe[1] == 0 + # fails in cases with empty columns reached via + # _frame_arith_method_with_reindex array_op = get_array_op(func, str_rep=str_rep) bm = operate_blockwise(left, right, array_op) @@ -684,7 +685,7 @@ def _frame_arith_method_with_reindex( return result.reindex(join_columns, axis=1) -def _arith_method_FRAME(cls, op, special): +def _arith_method_FRAME(cls: Type["DataFrame"], op, special: bool): # This is the only function where `special` can be either True or False str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -744,7 +745,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): return f -def _flex_comp_method_FRAME(cls, op, special): +def _flex_comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): assert not special # "special" also means "not flex" str_rep = _get_opstr(op) op_name = _get_op_name(op, special) @@ -780,7 +781,7 @@ def f(self, other, axis=default_axis, level=None): return f -def _comp_method_FRAME(cls, op, special): +def _comp_method_FRAME(cls: Type["DataFrame"], op, special: bool): assert special # "special" also means "not flex" str_rep = _get_opstr(op) op_name = _get_op_name(op, special)