-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
PERF: do DataFrame.op(series, axis=0) blockwise #31296
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5532b2e
1011185
a77057b
c9c2fbb
b50c0b8
e50f7d5
1dc2deb
0af4ffe
f02b8c0
231a316
bd889ef
4cbb7db
f29e09b
54df295
c899b9e
3110628
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,6 @@ | |
ABCDatetimeArray, | ||
ABCExtensionArray, | ||
ABCIndex, | ||
ABCIndexClass, | ||
ABCSeries, | ||
ABCTimedeltaArray, | ||
) | ||
|
@@ -53,13 +52,15 @@ def comp_method_OBJECT_ARRAY(op, x, y): | |
if isinstance(y, (ABCSeries, ABCIndex)): | ||
y = y.values | ||
|
||
result = libops.vec_compare(x.ravel(), y, op) | ||
if x.shape != y.shape: | ||
raise ValueError("Shapes must match", x.shape, y.shape) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result = libops.vec_compare(x.ravel(), y.ravel(), op) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
result = libops.scalar_compare(x.ravel(), y, op) | ||
return result.reshape(x.shape) | ||
|
||
|
||
def masked_arith_op(x, y, op): | ||
def masked_arith_op(x: np.ndarray, y, op): | ||
""" | ||
If the given arithmetic operation fails, attempt it again on | ||
only the non-null elements of the input array(s). | ||
|
@@ -78,10 +79,22 @@ def masked_arith_op(x, y, op): | |
dtype = find_common_type([x.dtype, y.dtype]) | ||
result = np.empty(x.size, dtype=dtype) | ||
|
||
if len(x) != len(y): | ||
if not _can_broadcast(x, y): | ||
raise ValueError(x.shape, y.shape) | ||
|
||
# Call notna on pre-broadcasted y for performance | ||
ymask = notna(y) | ||
y = np.broadcast_to(y, x.shape) | ||
ymask = np.broadcast_to(ymask, x.shape) | ||
|
||
else: | ||
ymask = notna(y) | ||
|
||
# NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex | ||
# we would get int64 dtype, see GH#19956 | ||
yrav = y.ravel() | ||
mask = notna(xrav) & notna(yrav) | ||
mask = notna(xrav) & ymask.ravel() | ||
|
||
if yrav.shape != mask.shape: | ||
# FIXME: GH#5284, GH#5035, GH#19448 | ||
|
@@ -211,6 +224,51 @@ def arithmetic_op(left: ArrayLike, right: Any, op, str_rep: str): | |
return res_values | ||
|
||
|
||
def _broadcast_comparison_op(lvalues, rvalues, op) -> np.ndarray: | ||
""" | ||
Broadcast a comparison operation between two 2D arrays. | ||
|
||
Parameters | ||
---------- | ||
lvalues : np.ndarray or ExtensionArray | ||
rvalues : np.ndarray or ExtensionArray | ||
|
||
Returns | ||
------- | ||
np.ndarray[bool] | ||
""" | ||
if isinstance(rvalues, np.ndarray): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this looks like the case on L81 in array_ops.py There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you follow up with consolidate if this is the case |
||
rvalues = np.broadcast_to(rvalues, lvalues.shape) | ||
result = comparison_op(lvalues, rvalues, op) | ||
else: | ||
result = np.empty(lvalues.shape, dtype=bool) | ||
for i in range(len(lvalues)): | ||
result[i, :] = comparison_op(lvalues[i], rvalues[:, 0], op) | ||
return result | ||
|
||
|
||
def _can_broadcast(lvalues, rvalues) -> bool: | ||
""" | ||
Check if we can broadcast rvalues to match the shape of lvalues. | ||
|
||
Parameters | ||
---------- | ||
lvalues : np.ndarray or ExtensionArray | ||
rvalues : np.ndarray or ExtensionArray | ||
|
||
Returns | ||
------- | ||
bool | ||
""" | ||
# We assume that lengths dont match | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same |
||
if lvalues.ndim == rvalues.ndim == 2: | ||
# See if we can broadcast unambiguously | ||
if lvalues.shape[1] == rvalues.shape[-1]: | ||
if rvalues.shape[0] == 1: | ||
return True | ||
return False | ||
|
||
|
||
def comparison_op( | ||
left: ArrayLike, right: Any, op, str_rep: Optional[str] = None, | ||
) -> ArrayLike: | ||
|
@@ -237,12 +295,16 @@ def comparison_op( | |
# TODO: same for tuples? | ||
rvalues = np.asarray(rvalues) | ||
|
||
if isinstance(rvalues, (np.ndarray, ABCExtensionArray, ABCIndexClass)): | ||
if isinstance(rvalues, (np.ndarray, ABCExtensionArray)): | ||
# TODO: make this treatment consistent across ops and classes. | ||
# We are not catching all listlikes here (e.g. frozenset, tuple) | ||
# The ambiguous case is object-dtype. See GH#27803 | ||
if len(lvalues) != len(rvalues): | ||
raise ValueError("Lengths must match to compare") | ||
if _can_broadcast(lvalues, rvalues): | ||
return _broadcast_comparison_op(lvalues, rvalues, op) | ||
raise ValueError( | ||
"Lengths must match to compare", lvalues.shape, rvalues.shape | ||
) | ||
|
||
if should_extension_dispatch(lvalues, rvalues): | ||
res_values = dispatch_to_extension_op(op, lvalues, rvalues) | ||
|
Uh oh!
There was an error while loading. Please reload this page.