BUG: Joining on non-unique PeriodIndex fails #16871

Dr-Irv · 2017-07-10T14:17:19Z

Code Sample, a copy-pastable example if possible

import pandas as pd
perindex = pd.period_range('2016-01-01', periods=16, freq='M')
perdf = pd.DataFrame([i for i in range(len(perindex))],
                     index=perindex, columns=['pnum'])
df2 = pd.concat([perdf, perdf])
perdf.merge(df2, left_index=True, right_index=True, how='outer')

Problem description

I reported this in #GH16541 but I guess it fell through the cracks. Here is the stack trace:

TypeError                                 Traceback (most recent call last)
<ipython-input-2-c7c6bdf18c3f> in <module>()
      3                      index=perindex, columns=['pnum'])
      4 df2 = pd.concat([perdf, perdf])
----> 5 perdf.merge(df2, left_index=True, right_index=True, how='outer')

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\frame.py in merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
   4720                      right_on=right_on, left_index=left_index,
   4721                      right_index=right_index, sort=sort, suffixes=suffixes,
-> 4722                      copy=copy, indicator=indicator)
   4723 
   4724     def round(self, decimals=0, *args, **kwargs):

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator)
     52                          right_index=right_index, sort=sort, suffixes=suffixes,
     53                          copy=copy, indicator=indicator)
---> 54     return op.get_result()
     55 
     56 

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in get_result(self)
    567                 self.left, self.right)
    568 
--> 569         join_index, left_indexer, right_indexer = self._get_join_info()
    570 
    571         ldata, rdata = self.left._data, self.right._data

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _get_join_info(self)
    720             join_index, left_indexer, right_indexer = \
    721                 left_ax.join(right_ax, how=self.how, return_indexers=True,
--> 722                              sort=self.sort)
    723         elif self.right_index and self.how == 'left':
    724             join_index, left_indexer, right_indexer = \

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\period.py in join(self, other, how, level, return_indexers, sort)
    929         result = Int64Index.join(self, other, how=how, level=level,
    930                                  return_indexers=return_indexers,
--> 931                                  sort=sort)
    932 
    933         if return_indexers:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\base.py in join(self, other, how, level, return_indexers, sort)
   3044             else:
   3045                 return self._join_non_unique(other, how=how,
-> 3046                                              return_indexers=return_indexers)
   3047         elif self.is_monotonic and other.is_monotonic:
   3048             try:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\indexes\base.py in _join_non_unique(self, other, how, return_indexers)
   3125         left_idx, right_idx = _get_join_indexers([self.values],
   3126                                                  [other._values], how=how,
-> 3127                                                  sort=True)
   3128 
   3129         left_idx = _ensure_platform_int(left_idx)

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _get_join_indexers(left_keys, right_keys, sort, how, **kwargs)
    980 
    981     # get left & right join labels and num. of levels at each location
--> 982     llab, rlab, shape = map(list, zip(* map(fkeys, left_keys, right_keys)))
    983 
    984     # get flat i8 keys from label lists

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _factorize_keys(lk, rk, sort)
   1410     if sort:
   1411         uniques = rizer.uniques.to_array()
-> 1412         llab, rlab = _sort_labels(uniques, llab, rlab)
   1413 
   1414     # NA group

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\reshape\merge.py in _sort_labels(uniques, left, right)
   1436     labels = np.concatenate([left, right])
   1437 
-> 1438     _, new_labels = algos.safe_sort(uniques, labels, na_sentinel=-1)
   1439     new_labels = _ensure_int64(new_labels)
   1440     new_left, new_right = new_labels[:l], new_labels[l:]

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\algorithms.py in safe_sort(values, labels, na_sentinel, assume_unique)
    481     if compat.PY3 and lib.infer_dtype(values) == 'mixed-integer':
    482         # unorderable in py3 if mixed str/int
--> 483         ordered = sort_mixed(values)
    484     else:
    485         try:

C:\Anaconda3\envs\py36\lib\site-packages\pandas\core\algorithms.py in sort_mixed(values)
    474         str_pos = np.array([isinstance(x, string_types) for x in values],
    475                            dtype=bool)
--> 476         nums = np.sort(values[~str_pos])
    477         strs = np.sort(values[str_pos])
    478         return _ensure_object(np.concatenate([nums, strs]))

C:\Anaconda3\envs\py36\lib\site-packages\numpy\core\fromnumeric.py in sort(a, axis, kind, order)
    820     else:
    821         a = asanyarray(a).copy(order="K")
--> 822     a.sort(axis=axis, kind=kind, order=order)
    823     return a
    824 

pandas/_libs/period.pyx in pandas._libs.period._Period.__richcmp__ (pandas\_libs\period.c:12067)()

TypeError: Cannot compare type 'Period' with type 'int'

Expected Output

Shouldn't get a stack trace!

Output of `pd.show_versions()`

INSTALLED VERSIONS ------------------ commit: None python: 3.6.1.final.0 python-bits: 64 OS: Windows OS-release: 10 machine: AMD64 processor: Intel64 Family 6 Model 60 Stepping 3, GenuineIntel byteorder: little LC_ALL: None LANG: None LOCALE: None.None

pandas: 0.20.3
pytest: None
pip: 9.0.1
setuptools: 27.2.0
Cython: None
numpy: 1.12.1
scipy: 0.19.0
xarray: None
IPython: 6.0.0
sphinx: None
patsy: 0.4.1
dateutil: 2.6.0
pytz: 2017.2
blosc: None
bottleneck: None
tables: None
numexpr: None
feather: None
matplotlib: 2.0.0
openpyxl: None
xlrd: 1.0.0
xlwt: None
xlsxwriter: None
lxml: None
bs4: None
html5lib: 0.999
sqlalchemy: None
pymysql: None
psycopg2: None
jinja2: 2.9.6
s3fs: None
pandas_gbq: None
pandas_datareader: None

The text was updated successfully, but these errors were encountered:

jreback · 2017-07-10T20:54:58Z

This should be self._values.

> /Users/jreback/pandas/pandas/core/indexes/base.py(3134)_join_non_unique()
   3132         left_idx, right_idx = _get_join_indexers([self.values],
   3133                                                  [other._values], how=how,
-> 3134                                                  sort=True)
   3135 
   3136         left_idx = _ensure_platform_int(left_idx)

ipdb> p self.values

we prob don't have too many tests which exercise this. welcome to have more and PR!

jreback · 2017-07-10T20:55:30Z

This is the non-unique case, which got overlooked I think.

alanbato · 2017-07-15T15:56:09Z

Working on it!

jreback added Bug Difficulty Intermediate Period Period data type Reshaping Concat, Merge/Join, Stack/Unstack, Explode labels Jul 10, 2017

jreback added this to the Next Major Release milestone Jul 10, 2017

jreback changed the title ~~Joining on PeriodIndex fails~~ BUG: Joining on non-unique PeriodIndex fails Jul 10, 2017

alanbato mentioned this issue Jul 15, 2017

Support non unique period indexes on join and merge operations #16949

Merged

4 tasks

jreback modified the milestones: 0.21.0, Next Major Release Jul 15, 2017

TomAugspurger closed this as completed in #16949 Jul 15, 2017

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

BUG: Joining on non-unique PeriodIndex fails #16871

BUG: Joining on non-unique PeriodIndex fails #16871

Dr-Irv commented Jul 10, 2017 •

edited

Loading

jreback commented Jul 10, 2017

Uh oh!

jreback commented Jul 10, 2017

Uh oh!

alanbato commented Jul 15, 2017

Uh oh!

Uh oh!

BUG: Joining on non-unique PeriodIndex fails #16871

BUG: Joining on non-unique PeriodIndex fails #16871

Comments

Dr-Irv commented Jul 10, 2017 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Code Sample, a copy-pastable example if possible

Problem description

Expected Output

Output of pd.show_versions()

jreback commented Jul 10, 2017

Uh oh!

jreback commented Jul 10, 2017

Uh oh!

alanbato commented Jul 15, 2017

Uh oh!

Dr-Irv commented Jul 10, 2017 •

edited

Loading

Output of `pd.show_versions()`