Skip to content

Commit 333eb0f

Browse files
committed
ENH: enable joining with overlap and lsuffix/rsuffix options
1 parent 01ea9cb commit 333eb0f

File tree

4 files changed

+67
-18
lines changed

4 files changed

+67
-18
lines changed

pandas/core/frame.py

Lines changed: 43 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,8 +1410,8 @@ def add_prefix(self, prefix):
14101410
-------
14111411
LongPanel
14121412
"""
1413-
f = (('%s' % prefix) + '%s').__mod__
1414-
return self.rename(columns=f)
1413+
new_data = self._data.add_prefix(prefix)
1414+
return self._constructor(new_data)
14151415

14161416
def add_suffix(self, suffix):
14171417
"""
@@ -1425,8 +1425,8 @@ def add_suffix(self, suffix):
14251425
-------
14261426
with_suffix : DataFrame
14271427
"""
1428-
f = ('%s' + ('%s' % suffix)).__mod__
1429-
return self.rename(columns=f)
1428+
new_data = self._data.add_suffix(suffix)
1429+
return self._constructor(new_data)
14301430

14311431
#----------------------------------------------------------------------
14321432
# Arithmetic / combination related
@@ -2065,7 +2065,7 @@ def append(self, other):
20652065
return self._constructor(data=new_data, index=new_index,
20662066
columns=new_columns)
20672067

2068-
def join(self, other, on=None, how=None):
2068+
def join(self, other, on=None, how=None, lsuffix='', rsuffix=''):
20692069
"""
20702070
Join columns with other DataFrame either on index or on a key
20712071
column
@@ -2083,6 +2083,10 @@ def join(self, other, on=None, how=None):
20832083
* right: use input frame's index
20842084
* outer: form union of indexes
20852085
* inner: use intersection of indexes
2086+
lsuffix : string
2087+
Suffix to use from left frame's overlapping columns
2088+
rsuffix : string
2089+
Suffix to use from right frame's overlapping columns
20862090
20872091
Returns
20882092
-------
@@ -2092,29 +2096,32 @@ def join(self, other, on=None, how=None):
20922096
if how is not None:
20932097
raise Exception('how parameter is not valid when '
20942098
'*on* specified')
2095-
return self._join_on(other, on)
2099+
return self._join_on(other, on, lsuffix, rsuffix)
20962100
else:
20972101
if how is None:
20982102
how = 'left'
2099-
return self._join_index(other, how)
2103+
return self._join_index(other, how, lsuffix, rsuffix)
21002104

2101-
def _join_on(self, other, on):
2105+
def _join_on(self, other, on, lsuffix, rsuffix):
21022106
if len(other.index) == 0:
21032107
return self
21042108

21052109
if on not in self:
21062110
raise Exception('%s column not contained in this frame!' % on)
21072111

2112+
this, other = self._maybe_rename_join(other, lsuffix, rsuffix)
21082113
new_data = self._data.join_on(other._data, self[on], axis=1)
21092114
return self._constructor(new_data)
21102115

2111-
def _join_index(self, other, how):
2116+
def _join_index(self, other, how, lsuffix, rsuffix):
21122117
join_index = self._get_join_index(other, how)
2113-
this_data = self.reindex(join_index)._data
2114-
other_data = other.reindex(join_index)._data
2118+
2119+
this = self.reindex(join_index)
2120+
other = other.reindex(join_index)
2121+
this, other = this._maybe_rename_join(other, lsuffix, rsuffix)
21152122

21162123
# merge blocks
2117-
merged_data = this_data.merge(other_data)
2124+
merged_data = this._data.merge(other._data)
21182125
return self._constructor(merged_data)
21192126

21202127
def _get_join_index(self, other, how):
@@ -2131,6 +2138,30 @@ def _get_join_index(self, other, how):
21312138

21322139
return join_index
21332140

2141+
def _maybe_rename_join(self, other, lsuffix, rsuffix):
2142+
intersection = self.columns.intersection(other.columns)
2143+
2144+
if len(intersection) > 0:
2145+
if not lsuffix and not rsuffix:
2146+
raise Exception('columns overlap: %s' % intersection)
2147+
2148+
def lrenamer(x):
2149+
if x in intersection:
2150+
return '%s%s' % (x, lsuffix)
2151+
return x
2152+
2153+
def rrenamer(x):
2154+
if x in intersection:
2155+
return '%s%s' % (x, rsuffix)
2156+
return x
2157+
2158+
this = self.rename(columns=lrenamer)
2159+
other = other.rename(columns=rrenamer)
2160+
else:
2161+
this = self
2162+
2163+
return this, other
2164+
21342165
#----------------------------------------------------------------------
21352166
# Statistical methods, etc.
21362167

pandas/core/internals.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -673,6 +673,14 @@ def rename_items(self, mapper):
673673
new_axes[0] = new_items
674674
return BlockManager(new_blocks, new_axes)
675675

676+
def add_prefix(self, prefix):
677+
f = (('%s' % prefix) + '%s').__mod__
678+
return self.rename_items(f)
679+
680+
def add_suffix(self, suffix):
681+
f = ('%s' + ('%s' % suffix)).__mod__
682+
return self.rename_items(f)
683+
676684
def fillna(self, value):
677685
"""
678686

pandas/core/sparse.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1058,16 +1058,16 @@ def _join_on(self, other, on):
10581058
# need to implement?
10591059
raise NotImplementedError
10601060

1061-
def _join_index(self, other, how):
1061+
def _join_index(self, other, how, lsuffix, rsuffix):
10621062
join_index = self._get_join_index(other, how)
10631063

1064-
result_series = self.reindex(join_index)._series
1065-
other_series = other.reindex(join_index)._series
1064+
this = self.reindex(join_index)
1065+
other = other.reindex(join_index)
10661066

1067-
for col in other_series:
1068-
if col in result_series:
1069-
raise Exception('Overlapping columns!')
1067+
this, other = this._maybe_rename_join(other, lsuffix, rsuffix)
10701068

1069+
result_series = this._series
1070+
other_series = other._series
10711071
result_series.update(other_series)
10721072

10731073
return self._constructor(result_series, index=join_index)

pandas/tests/test_frame.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2299,6 +2299,16 @@ def test_join(self):
22992299
self.assertRaises(Exception, target.join, source, on='C',
23002300
how='left')
23012301

2302+
def test_join_overlap(self):
2303+
df1 = self.frame.copy()
2304+
df2 = self.frame.copy()
2305+
2306+
joined = df1.join(df2, lsuffix='_df1', rsuffix='_df2')
2307+
df1_suf = df1.add_suffix('_df1')
2308+
df2_suf = df2.add_suffix('_df2')
2309+
expected = df1_suf.join(df2_suf)
2310+
assert_frame_equal(joined, expected)
2311+
23022312
def test_clip(self):
23032313
median = self.frame.median().median()
23042314

0 commit comments

Comments
 (0)