Skip to content

Commit d8f2d70

Browse files
committed
BUG: align logic between replace dict using integers and using strings (# 20656)
1 parent 415a01e commit d8f2d70

File tree

4 files changed

+118
-25
lines changed

4 files changed

+118
-25
lines changed

doc/source/whatsnew/v0.24.0.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -573,6 +573,5 @@ Other
573573
- :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
574574
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
575575
- :meth: `~pandas.io.formats.style.Styler.background_gradient` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` (:issue:`15204`)
576-
-
577-
-
576+
- Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`)
578577
-

pandas/core/internals/blocks.py

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,44 @@ def _nanpercentile(values, q, axis, **kw):
16891689
placement=np.arange(len(result)),
16901690
ndim=ndim)
16911691

1692+
def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True,
1693+
convert=False, regex=False, mgr=None):
1694+
"""
1695+
Replace value corresponding to the given boolean array with another
1696+
value.
1697+
1698+
Parameters
1699+
----------
1700+
mask : array_like of bool
1701+
The mask of values to replace.
1702+
src : object
1703+
The value to replace. It is ignored if regex is False.
1704+
dst : object
1705+
The value to be replaced with.
1706+
convert : bool
1707+
If true, try to coerce any object types to better types.
1708+
regex : bool
1709+
If true, search for element matching with the pattern in src.
1710+
Masked element is ignored.
1711+
mgr : BlockPlacement, optional
1712+
1713+
Returns
1714+
-------
1715+
A new block if there is anything to replace or the original block.
1716+
"""
1717+
1718+
if mask.any():
1719+
if not regex:
1720+
self = self.coerce_to_target_dtype(dst)
1721+
return self.putmask(mask, dst, inplace=inplace)
1722+
else:
1723+
return self._replace_single(src, dst, inplace=inplace,
1724+
regex=regex,
1725+
convert=convert,
1726+
mask=mask,
1727+
mgr=mgr)
1728+
return self
1729+
16921730

16931731
class ScalarBlock(Block):
16941732
"""
@@ -2464,7 +2502,7 @@ def replace(self, to_replace, value, inplace=False, filter=None,
24642502
regex=regex, mgr=mgr)
24652503

24662504
def _replace_single(self, to_replace, value, inplace=False, filter=None,
2467-
regex=False, convert=True, mgr=None):
2505+
regex=False, convert=True, mgr=None, mask=None):
24682506

24692507
inplace = validate_bool_kwarg(inplace, 'inplace')
24702508

@@ -2531,15 +2569,56 @@ def re_replacer(s):
25312569
else:
25322570
filt = self.mgr_locs.isin(filter).nonzero()[0]
25332571

2534-
new_values[filt] = f(new_values[filt])
2572+
if mask is None:
2573+
new_values[filt] = f(new_values[filt])
2574+
else:
2575+
new_values[filt][mask] = f(new_values[filt][mask])
25352576

25362577
# convert
25372578
block = self.make_block(new_values)
25382579
if convert:
25392580
block = block.convert(by_item=True, numeric=False)
2540-
25412581
return block
25422582

2583+
def _replace_coerce(self, mask=None, src=None, dst=None, inplace=True,
2584+
convert=False, regex=False, mgr=None):
2585+
"""
2586+
Replace value corresponding to the given boolean array with another
2587+
value.
2588+
2589+
Parameters
2590+
----------
2591+
mask : array_like of bool
2592+
The mask of values to replace.
2593+
src : object
2594+
The value to replace. It is ignored if regex is False.
2595+
dst : object
2596+
The value to be replaced with.
2597+
convert : bool
2598+
If true, try to coerce any object types to better types.
2599+
regex : bool
2600+
If true, search for element matching with the pattern in src.
2601+
Masked element is ignored.
2602+
mgr : BlockPlacement, optional
2603+
2604+
Returns
2605+
-------
2606+
A new block if there is anything to replace or the original block.
2607+
"""
2608+
if mask.any():
2609+
block = super(ObjectBlock, self)._replace_coerce(mask=mask,
2610+
src=src,
2611+
dst=dst,
2612+
inplace=inplace,
2613+
convert=convert,
2614+
regex=regex,
2615+
mgr=mgr)
2616+
if convert:
2617+
block = [b.convert(by_item=True, numeric=False, copy=True)
2618+
for b in block]
2619+
return block
2620+
return self
2621+
25432622

25442623
class CategoricalBlock(ExtensionBlock):
25452624
__slots__ = ()

pandas/core/internals/managers.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from functools import partial
44
import itertools
55
import operator
6+
import re
67

78
import numpy as np
89

@@ -19,11 +20,13 @@
1920
is_datetimelike_v_numeric,
2021
is_numeric_v_string_like, is_extension_type,
2122
is_extension_array_dtype,
22-
is_scalar)
23+
is_scalar,
24+
is_re_compilable)
2325
from pandas.core.dtypes.cast import (
2426
maybe_promote,
2527
infer_dtype_from_scalar,
26-
find_common_type)
28+
find_common_type,
29+
maybe_convert_objects)
2730
from pandas.core.dtypes.missing import isna
2831
import pandas.core.dtypes.concat as _concat
2932
from pandas.core.dtypes.generic import ABCSeries, ABCExtensionArray
@@ -571,12 +574,17 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False,
571574
# figure out our mask a-priori to avoid repeated replacements
572575
values = self.as_array()
573576

574-
def comp(s):
577+
def comp(s, reg=False):
575578
if isna(s):
576579
return isna(values)
577-
return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq)
580+
if hasattr(s, 'asm8'):
581+
return _maybe_compare(maybe_convert_objects(values),
582+
getattr(s, 'asm8'), reg)
583+
if reg and is_re_compilable(s):
584+
return _maybe_compare(values, s, reg)
585+
return _maybe_compare(values, s, reg)
578586

579-
masks = [comp(s) for i, s in enumerate(src_list)]
587+
masks = [comp(s, regex) for i, s in enumerate(src_list)]
580588

581589
result_blocks = []
582590
src_len = len(src_list) - 1
@@ -588,20 +596,16 @@ def comp(s):
588596
for i, (s, d) in enumerate(zip(src_list, dest_list)):
589597
new_rb = []
590598
for b in rb:
591-
if b.dtype == np.object_:
592-
convert = i == src_len
593-
result = b.replace(s, d, inplace=inplace, regex=regex,
594-
mgr=mgr, convert=convert)
599+
m = masks[i][b.mgr_locs.indexer]
600+
convert = i == src_len
601+
result = b._replace_coerce(mask=m, src=s, dst=d,
602+
inplace=inplace,
603+
convert=convert, regex=regex,
604+
mgr=mgr)
605+
if m.any():
595606
new_rb = _extend_blocks(result, new_rb)
596607
else:
597-
# get our mask for this element, sized to this
598-
# particular block
599-
m = masks[i][b.mgr_locs.indexer]
600-
if m.any():
601-
b = b.coerce_to_target_dtype(d)
602-
new_rb.extend(b.putmask(m, d, inplace=True))
603-
else:
604-
new_rb.append(b)
608+
new_rb.append(b)
605609
rb = new_rb
606610
result_blocks.extend(rb)
607611

@@ -1890,7 +1894,12 @@ def _consolidate(blocks):
18901894
return new_blocks
18911895

18921896

1893-
def _maybe_compare(a, b, op):
1897+
def _maybe_compare(a, b, regex=False):
1898+
if not regex:
1899+
op = lambda x: operator.eq(x, b)
1900+
else:
1901+
op = np.vectorize(lambda x: bool(re.match(b, x)) if isinstance(x, str)
1902+
else False)
18941903

18951904
is_a_array = isinstance(a, np.ndarray)
18961905
is_b_array = isinstance(b, np.ndarray)
@@ -1902,9 +1911,8 @@ def _maybe_compare(a, b, op):
19021911
# numpy deprecation warning if comparing numeric vs string-like
19031912
elif is_numeric_v_string_like(a, b):
19041913
result = False
1905-
19061914
else:
1907-
result = op(a, b)
1915+
result = op(a)
19081916

19091917
if is_scalar(result) and (is_a_array or is_b_array):
19101918
type_names = [type(a).__name__, type(b).__name__]

pandas/tests/series/test_replace.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,13 @@ def test_replace_string_with_number(self):
243243
expected = pd.Series([1, 2, 3])
244244
tm.assert_series_equal(expected, result)
245245

246+
def test_repace_intertwined_key_value_dict(self):
247+
# GH 20656
248+
s = pd.Series(['a', 'b'])
249+
expected = pd.Series(['b', 'a'])
250+
result = s.replace({'a': 'b', 'b': 'a'})
251+
tm.assert_series_equal(expected, result)
252+
246253
def test_replace_unicode_with_number(self):
247254
# GH 15743
248255
s = pd.Series([1, 2, 3])

0 commit comments

Comments
 (0)