diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index bd7a95a2ae0f4..8b467d768df8b 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -667,7 +667,6 @@ Enhancements -- Bug in ``get`` where an ``IndexError`` would not cause the default value to be returned (:issue:`7725`) @@ -745,10 +744,10 @@ Bug Fixes - Bug in DataFrameGroupby.transform when transforming with a passed non-sorted key (:issue:`8046`) - Bug in repeated timeseries line and area plot may result in ``ValueError`` or incorrect kind (:issue:`7733`) - Bug in inference in a MultiIndex with ``datetime.date`` inputs (:issue:`7888`) - +- Bug in ``get`` where an ``IndexError`` would not cause the default value to be returned (:issue:`7725`) - Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may reset nanosecond (:issue:`7697`) - Bug in ``offsets.apply``, ``rollforward`` and ``rollback`` may raise ``AttributeError`` if ``Timestamp`` has ``dateutil`` tzinfo (:issue:`7697`) - +- Bug in sorting a multi-index frame with a Float64Index (:issue:`8017`) - Bug in ``is_superperiod`` and ``is_subperiod`` cannot handle higher frequencies than ``S`` (:issue:`7760`, :issue:`7772`, :issue:`7803`) diff --git a/pandas/core/format.py b/pandas/core/format.py index e8645e578c976..e023f79d9f3dd 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -625,10 +625,17 @@ def is_numeric_dtype(dtype): fmt_columns = columns.format(sparsify=False, adjoin=False) fmt_columns = lzip(*fmt_columns) dtypes = self.frame.dtypes.values + + # if we have a Float level, they don't use leading space at all + restrict_formatting = any([ l.is_floating for l in columns.levels ]) need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) - str_columns = list(zip(*[ - [' ' + y if y not in self.formatters and need_leadsp[x] - else y for y in x] for x in fmt_columns])) + + def space_format(x,y): + if y not in self.formatters and need_leadsp[x] and not restrict_formatting: + return ' ' + y + return y + + str_columns = list(zip(*[ [ space_format(x,y) for y in x ] for x in fmt_columns ])) if self.sparsify: str_columns = _sparsify(str_columns) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dd3d5c0e31196..939a94c033ea0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2770,6 +2770,12 @@ def trans(v): na_position=na_position) elif isinstance(labels, MultiIndex): + + # make sure that the axis is lexsorted to start + # if not we need to reconstruct to get the correct indexer + if not labels.is_lexsorted(): + labels = MultiIndex.from_tuples(labels.values) + indexer = _lexsort_indexer(labels.labels, orders=ascending, na_position=na_position) indexer = com._ensure_platform_int(indexer) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ed47161b9a957..f4192e5761d7a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1628,6 +1628,7 @@ def sort_index(self, axis=0, ascending=True): new_axis = labels.take(sort_index) return self.reindex(**{axis_name: new_axis}) + _shared_docs['reindex'] = """ Conform %(klass)s to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object @@ -3558,10 +3559,10 @@ def _tz_convert(ax, tz): result = self._constructor(self._data, copy=copy) result.set_axis(axis,ax) return result.__finalize__(self) - + @deprecate_kwarg(old_arg_name='infer_dst', new_arg_name='ambiguous', mapping={True: 'infer', False: 'raise'}) - def tz_localize(self, tz, axis=0, level=None, copy=True, + def tz_localize(self, tz, axis=0, level=None, copy=True, ambiguous='raise'): """ Localize tz-naive TimeSeries to target time zone @@ -3583,7 +3584,7 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'raise' will raise an AmbiguousTimeError if there are ambiguous times infer_dst : boolean, default False (DEPRECATED) Attempt to infer fall dst-transition hours based on order - + Returns ------- """ diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 0bfa9be2feacf..2171b8e8428a4 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -214,6 +214,44 @@ def test_sort_index_preserve_levels(self): result = self.frame.sort_index() self.assertEqual(result.index.names, self.frame.index.names) + def test_sorting_repr_8017(self): + + np.random.seed(0) + data = np.random.randn(3,4) + + for gen, extra in [([1.,3.,2.,5.],4.), + ([1,3,2,5],4), + ([Timestamp('20130101'),Timestamp('20130103'),Timestamp('20130102'),Timestamp('20130105')],Timestamp('20130104')), + (['1one','3one','2one','5one'],'4one')]: + columns = MultiIndex.from_tuples([('red', i) for i in gen]) + df = DataFrame(data, index=list('def'), columns=columns) + df2 = pd.concat([df,DataFrame('world', + index=list('def'), + columns=MultiIndex.from_tuples([('red', extra)]))],axis=1) + + # check that the repr is good + # make sure that we have a correct sparsified repr + # e.g. only 1 header of read + self.assertEqual(str(df2).splitlines()[0].split(),['red']) + + # GH 8017 + # sorting fails after columns added + + # construct single-dtype then sort + result = df.copy().sort_index(axis=1) + expected = df.iloc[:,[0,2,1,3]] + assert_frame_equal(result, expected) + + result = df2.sort_index(axis=1) + expected = df2.iloc[:,[0,2,1,4,3]] + assert_frame_equal(result, expected) + + # setitem then sort + result = df.copy() + result[('red',extra)] = 'world' + result = result.sort_index(axis=1) + assert_frame_equal(result, expected) + def test_repr_to_string(self): repr(self.frame) repr(self.ymd)