diff --git a/pandas/core/format.py b/pandas/core/format.py index c2f439877ca00..5a69d8f03f12b 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -26,6 +26,10 @@ from pandas.tseries.period import PeriodIndex, DatetimeIndex +horiz_ellips = u('\u22ef') # ⋯ +vert_ellips = u('\u22ee') # ⋮ +diag_ellips = u('\u22f1') # ⋱ + docstring_to_string = """ Parameters ---------- @@ -314,6 +318,10 @@ def __init__(self, frame, buf=None, columns=None, col_space=None, self._chk_truncate() def _chk_truncate(self): + ''' + If truncation necessary, prepare data and additional + information. + ''' from pandas.tools.merge import concat truncate_h = self.max_cols and (len(self.columns) > self.max_cols) @@ -354,7 +362,6 @@ def _to_str_columns(self): # may include levels names also str_index = self._get_formatted_index(frame) - str_columns = self._get_formatted_column_labels(frame) if self.header: @@ -370,7 +377,6 @@ def _to_str_columns(self): fmt_values = _make_fixed_width(fmt_values, self.justify, minimum=max_colwidth) - max_len = max(np.max([_strlen(x) for x in fmt_values]), max_colwidth) if self.justify == 'left': @@ -395,32 +401,26 @@ def _to_str_columns(self): # Add ... to signal truncated truncate_h = self.truncate_h truncate_v = self.truncate_v - + nlevels_row = frame.index.nlevels + nlevels_col = frame.columns.nlevels #len(str_index) - len(frame) if truncate_h: col_num = self.tr_col_num col_width = len(strcols[col_num][0]) # infer from column header - strcols.insert(col_num + 1, ['...'.center(col_width)] * (len(str_index))) - if truncate_v: - n_header_rows = len(str_index) - len(frame) + strcols.insert(col_num + 1, [ horiz_ellips.center(col_width) ] * (len(str_index))) + if truncate_v: row_num = self.tr_row_num for ix,col in enumerate(strcols): - cwidth = len(strcols[ix][row_num]) # infer from above row - is_dot_col = False - if truncate_h: - is_dot_col = ix == col_num + 1 - if cwidth > 3 or is_dot_col: - my_str = '...' - else: - my_str = '..' - + cwidth = max(len(ele) for ele in strcols[ix][:row_num + nlevels_col]) # infer from above rows if ix == 0: - dot_str = my_str.ljust(cwidth) - elif is_dot_col: - dot_str = my_str.center(cwidth) + cwidth_split = ( cwidth - ( nlevels_row - 1 ) ) // nlevels_row + dot_str = vert_ellips.center(cwidth_split) + for lvl in range(2,nlevels_row): + dot_str += ' ' + vert_ellips.center(cwidth_split) else: - dot_str = my_str.rjust(cwidth) - - strcols[ix].insert(row_num + n_header_rows, dot_str) + dot_str = vert_ellips.center(cwidth) + strcols[ix].insert(row_num + nlevels_row, dot_str) + if truncate_h and truncate_v: + strcols[col_num + 1][nlevels_col + row_num] = diag_ellips.center(col_width) return strcols @@ -428,7 +428,6 @@ def to_string(self): """ Render a DataFrame to a console-friendly tabular output. """ - frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: @@ -464,7 +463,7 @@ def _join_multiline(self, *strcols): col_bins = _binify(col_widths, lwidth) nbins = len(col_bins) - if self.max_rows and len(self.frame) > self.max_rows: + if self.truncate_v: nrows = self.max_rows + 1 else: nrows = len(self.frame) @@ -836,7 +835,7 @@ def _column_header(): elif tag + span > ins_col: recs_new[tag] = span + 1 if lnum == inner_lvl: - values = values[:ins_col] + (u('...'),) + \ + values = values[:ins_col] + (horiz_ellips,) + \ values[ins_col:] else: # sparse col headers do not receive a ... values = values[:ins_col] + \ @@ -846,7 +845,7 @@ def _column_header(): # if ins_col lies between tags, all col headers get ... if tag + span == ins_col: recs_new[ins_col] = 1 - values = values[:ins_col] + (u('...'),) + \ + values = values[:ins_col] + (horiz_ellips,) + \ values[ins_col:] records = recs_new inner_lvl = len(level_lengths) - 1 @@ -861,8 +860,7 @@ def _column_header(): recs_new[tag] = span recs_new[ins_col] = 1 records = recs_new - values = values[:ins_col] + [u('...')] + values[ins_col:] - + values = values[:ins_col] + [horiz_ellips] + values[ins_col:] name = self.columns.names[lnum] row = [''] * (row_levels - 1) + ['' if name is None else com.pprint_thing(name)] @@ -876,6 +874,7 @@ def _column_header(): continue j += 1 row.append(v) + self.write_tr(row, indent, self.indent_delta, tags=tags, header=True) else: @@ -884,8 +883,7 @@ def _column_header(): if truncate_h: ins_col = row_levels + self.fmt.tr_col_num - col_row.insert(ins_col, '...') - + col_row.insert(ins_col, horiz_ellips) self.write_tr(col_row, indent, self.indent_delta, header=True, align=align) @@ -931,6 +929,8 @@ def _write_body(self, indent): def _write_regular_rows(self, fmt_values, indent): truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v + if truncate_h: + dot_col_ix = self.fmt.tr_col_num + 1 ncols = len(self.fmt.tr_frame.columns) nrows = len(self.fmt.tr_frame) @@ -941,19 +941,21 @@ def _write_regular_rows(self, fmt_values, indent): index_values = self.fmt.tr_frame.index.format() for i in range(nrows): - if truncate_v and i == (self.fmt.tr_row_num): - str_sep_row = [ '...' for ele in row ] + str_sep_row = [ vert_ellips for ele in row ] + if truncate_h: + str_sep_row = str_sep_row[:dot_col_ix] + \ + [diag_ellips] + \ + str_sep_row[dot_col_ix+1:] self.write_tr(str_sep_row, indent, self.indent_delta, tags=None, nindex_levels=1) - row = [] row.append(index_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: - dot_col_ix = self.fmt.tr_col_num + 1 - row.insert(dot_col_ix, '...') + row.insert(dot_col_ix, horiz_ellips) + self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=1) @@ -962,6 +964,8 @@ def _write_hierarchical_rows(self, fmt_values, indent): truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v + if truncate_h: + dot_col_ix = self.fmt.tr_col_num + 1 frame = self.fmt.tr_frame ncols = len(frame.columns) nrows = len(frame) @@ -991,7 +995,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): elif tag + span > ins_row: rec_new[tag] = span + 1 dot_row = list(idx_values[ins_row - 1]) - dot_row[-1] = u('...') + dot_row[-1] = vert_ellips idx_values.insert(ins_row,tuple(dot_row)) else: rec_new[tag] = span @@ -999,12 +1003,12 @@ def _write_hierarchical_rows(self, fmt_values, indent): if tag + span == ins_row: rec_new[ins_row] = 1 if lnum == 0: - idx_values.insert(ins_row,tuple([u('...')]*len(level_lengths))) + idx_values.insert(ins_row,tuple([vert_ellips]*len(level_lengths))) level_lengths[lnum] = rec_new level_lengths[inner_lvl][ins_row] = 1 for ix_col in range(len(fmt_values)): - fmt_values[ix_col].insert(ins_row,'...') + fmt_values[ix_col].insert(ins_row,vert_ellips) nrows += 1 for i in range(nrows): @@ -1026,19 +1030,33 @@ def _write_hierarchical_rows(self, fmt_values, indent): row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: - row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, '...') + if i == self.fmt.tr_row_num: + row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, + diag_ellips) + else: + row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, + horiz_ellips) + self.write_tr(row, indent, self.indent_delta, tags=tags, - nindex_levels=len(levels) - sparse_offset) + nindex_levels=len(levels) - sparse_offset) else: for i in range(len(frame)): idx_values = list(zip(*frame.index.format(sparsify=False, adjoin=False, names=False))) + if truncate_v and i == self.fmt.tr_row_num: + str_sep_row = vert_ellips * row_levels + \ + vert_ellips * ( dot_col_ix - 1 ) + \ + diag_ellips + \ + vert_ellips * (dot_col_ix - 1) + self.write_tr(str_sep_row, indent, self.indent_delta, + tags=None, align='center') row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: - row.insert(row_levels + self.fmt.tr_col_num, '...') + row.insert(row_levels + self.fmt.tr_col_num, + horiz_ellips) self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=frame.index.nlevels) diff --git a/pandas/core/series.py b/pandas/core/series.py index ea3656662ab06..ea1e519db5141 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -847,6 +847,7 @@ def _tidy_repr(self, max_vals=20): Internal function, should always return unicode string """ + vert_ellips = fmt.vert_ellips if max_vals > 1: num = max_vals // 2 else: @@ -858,7 +859,10 @@ def _tidy_repr(self, max_vals=20): length=False, name=False, dtype=False) - result = head + '\n...\n' + tail + wdth = len(head.split(u('\n'))[0]) + # from IPython.core.debugger import Tracer + # Tracer()() + result = head + u('\n') + vert_ellips.center(wdth) + u('\n') + tail result = '%s\n%s' % (result, self._repr_footer()) return compat.text_type(result) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 8e405dc98f3da..83f0cd6a4d37a 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -29,6 +29,10 @@ _frame = DataFrame(tm.getSeriesData()) +# Truncation symbols +horiz_ellips = u('\u22ef') # ⋯ +vert_ellips = u('\u22ee') # ⋮ +diag_ellips = u('\u22f1') # ⋱ def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) @@ -48,22 +52,24 @@ def has_non_verbose_info_repr(df): def has_horizontally_truncated_repr(df): try: # Check header row - fst_line = np.array(repr(df).splitlines()[0].split()) - cand_col = np.where(fst_line=='...')[0][0] + fst_line = np.array(df.__unicode__().splitlines()[0].split()) + cand_col = np.where(fst_line==horiz_ellips)[0][0] except: return False # Make sure each row has this ... in the same place - r = repr(df) + r = df.__unicode__() for ix,l in enumerate(r.splitlines()): - if not r.split()[cand_col] == '...': + ele = r.split()[cand_col] + if not (ele == horiz_ellips or ele == diag_ellips): return False return True def has_vertically_truncated_repr(df): - r = repr(df) + regex = u('^[') + vert_ellips + diag_ellips + u('\s]+$') + r = df.__unicode__() only_dot_row = False - for row in r.splitlines(): - if re.match('^[\.\ ]+$',row): + for ix_row,row in enumerate(r.splitlines()): + if re.match(regex,row): only_dot_row = True return only_dot_row @@ -800,7 +806,7 @@ def test_to_html_truncate(self): 0 1 - ... + ⋯ 18 19 @@ -810,7 +816,7 @@ def test_to_html_truncate(self): 2001-01-01 NaN NaN - ... + ⋯ NaN NaN @@ -818,7 +824,7 @@ def test_to_html_truncate(self): 2001-01-02 NaN NaN - ... + ⋯ NaN NaN @@ -826,7 +832,7 @@ def test_to_html_truncate(self): 2001-01-03 NaN NaN - ... + ⋯ NaN NaN @@ -834,23 +840,23 @@ def test_to_html_truncate(self): 2001-01-04 NaN NaN - ... + ⋯ NaN NaN - ... - ... - ... - ... - ... - ... + ⋮ + ⋮ + ⋮ + ⋱ + ⋮ + ⋮ 2001-01-17 NaN NaN - ... + ⋯ NaN NaN @@ -858,7 +864,7 @@ def test_to_html_truncate(self): 2001-01-18 NaN NaN - ... + ⋯ NaN NaN @@ -866,7 +872,7 @@ def test_to_html_truncate(self): 2001-01-19 NaN NaN - ... + ⋯ NaN NaN @@ -874,7 +880,7 @@ def test_to_html_truncate(self): 2001-01-20 NaN NaN - ... + ⋯ NaN NaN @@ -902,7 +908,7 @@ def test_to_html_truncate_multi_index(self): bar baz - ... + ⋯ foo qux @@ -912,7 +918,7 @@ def test_to_html_truncate_multi_index(self): one two one - ... + ⋯ two one two @@ -925,7 +931,7 @@ def test_to_html_truncate_multi_index(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -935,7 +941,7 @@ def test_to_html_truncate_multi_index(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -946,21 +952,21 @@ def test_to_html_truncate_multi_index(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN - ... - ... - ... - ... - ... - ... - ... - ... - ... + ⋮ + ⋮ + ⋮ + ⋮ + ⋮ + ⋱ + ⋮ + ⋮ + ⋮ foo @@ -968,7 +974,7 @@ def test_to_html_truncate_multi_index(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -979,7 +985,7 @@ def test_to_html_truncate_multi_index(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -989,7 +995,7 @@ def test_to_html_truncate_multi_index(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -1006,7 +1012,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] df = pd.DataFrame(index=arrays,columns=arrays) - fmt.set_option('display.max_rows',7) + fmt.set_option('display.max_rows',4) fmt.set_option('display.max_columns',7) fmt.set_option('display.multi_sparse',False) result = df._repr_html_() @@ -1020,7 +1026,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): bar bar baz - ... + ⋯ foo qux qux @@ -1031,7 +1037,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): one two one - ... + ⋯ two one two @@ -1044,7 +1050,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -1055,32 +1061,21 @@ def test_to_html_truncate_multi_index_sparse_off(self): NaN NaN NaN - ... - NaN - NaN - NaN - - - baz - one - NaN - NaN - NaN - ... + ⋯ NaN NaN NaN - - foo - two - NaN - NaN - NaN - ... - NaN - NaN - NaN + + ⋮ + ⋮ + ⋮ + ⋮ + ⋮ + ⋱ + ⋮ + ⋮ + ⋮ qux @@ -1088,7 +1083,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -1099,7 +1094,7 @@ def test_to_html_truncate_multi_index_sparse_off(self): NaN NaN NaN - ... + ⋯ NaN NaN NaN @@ -1856,11 +1851,11 @@ def test_repr_html_wide(self): max_cols = get_option('display.max_columns') df = DataFrame([row(max_cols-1, 25) for _ in range(10)]) reg_repr = df._repr_html_() - assert "..." not in reg_repr + assert horiz_ellips not in reg_repr wide_df = DataFrame([row(max_cols+1, 25) for _ in range(10)]) wide_repr = wide_df._repr_html_() - assert "..." in wide_repr + assert horiz_ellips in wide_repr def test_repr_html_wide_multiindex_cols(self): row = lambda l, k: [tm.rands(k) for _ in range(l)] @@ -1877,20 +1872,20 @@ def test_repr_html_wide_multiindex_cols(self): mcols = pandas.MultiIndex.from_tuples(tuples, names=['first', 'second']) df = DataFrame([row(len(mcols), 25) for _ in range(10)], columns=mcols) wide_repr = df._repr_html_() - assert '...' in wide_repr + assert horiz_ellips in wide_repr def test_repr_html_long(self): max_rows = get_option('display.max_rows') h = max_rows - 1 df = pandas.DataFrame({'A':np.arange(1,1+h), 'B':np.arange(41, 41+h)}) reg_repr = df._repr_html_() - assert '..' not in reg_repr + assert vert_ellips not in reg_repr assert str(41 + max_rows // 2) in reg_repr h = max_rows + 1 df = pandas.DataFrame({'A':np.arange(1,1+h), 'B':np.arange(41, 41+h)}) long_repr = df._repr_html_() - assert '..' in long_repr + assert vert_ellips in long_repr assert str(41 + max_rows // 2) not in long_repr assert u('%d rows ') % h in long_repr assert u('2 columns') in long_repr @@ -1900,13 +1895,13 @@ def test_repr_html_float(self): h = max_rows - 1 df = pandas.DataFrame({'idx':np.linspace(-10,10,h), 'A':np.arange(1,1+h), 'B': np.arange(41, 41+h) }).set_index('idx') reg_repr = df._repr_html_() - assert '..' not in reg_repr + assert vert_ellips not in reg_repr assert str(40 + h) in reg_repr h = max_rows + 1 df = pandas.DataFrame({'idx':np.linspace(-10,10,h), 'A':np.arange(1,1+h), 'B': np.arange(41, 41+h) }).set_index('idx') long_repr = df._repr_html_() - assert '..' in long_repr + assert vert_ellips in long_repr assert '31' not in long_repr assert u('%d rows ') % h in long_repr assert u('2 columns') in long_repr @@ -1927,7 +1922,7 @@ def test_repr_html_long_multiindex(self): df = DataFrame(np.random.randn((max_L1+1)*2, 2), index=idx, columns=['A', 'B']) long_repr = df._repr_html_() - assert '...' in long_repr + assert vert_ellips in long_repr def test_repr_html_long_and_wide(self): max_cols = get_option('display.max_columns') @@ -1939,7 +1934,8 @@ def test_repr_html_long_and_wide(self): h, w = max_rows+1, max_cols+1 df = pandas.DataFrame(dict((k,np.arange(1,1+h)) for k in np.arange(w))) - assert '...' in df._repr_html_() + df_html = df._repr_html_() + assert horiz_ellips in df_html and vert_ellips in df_html and diag_ellips in df_html def test_info_repr(self): max_rows = get_option('display.max_rows')