Skip to content

Commit f44e671

Browse files
committed
Add truncatable repr for DF groupby groups
1 parent 518b237 commit f44e671

File tree

4 files changed

+49
-14
lines changed

4 files changed

+49
-14
lines changed

pandas/core/frame.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def _repr_fits_horizontal_(self, ignore_width=False):
553553
Check if full repr fits in horizontal boundaries imposed by the display
554554
options width and max_columns.
555555
556-
In case off non-interactive session, no boundaries apply.
556+
In case of non-interactive session, no boundaries apply.
557557
558558
`ignore_width` is here so ipnb+HTML output can behave the way
559559
users expect. display.max_columns remains in effect.

pandas/core/groupby/groupby.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class providing the base-class of operations.
3333
from pandas.core.base import (
3434
DataError, GroupByError, PandasObject, SelectionMixin, SpecificationError)
3535
import pandas.core.common as com
36-
from pandas.core.config import option_context
36+
from pandas.core.config import get_option, option_context
3737
from pandas.core.frame import DataFrame
3838
from pandas.core.generic import NDFrame
3939
from pandas.core.groupby import base
@@ -387,7 +387,7 @@ def groups(self):
387387
Dict {group name -> group labels}.
388388
"""
389389
self._assure_grouper()
390-
return self.grouper.groups
390+
return DataFrameGroups(self.grouper.groups)
391391

392392
@property
393393
def ngroups(self):
@@ -2108,3 +2108,10 @@ def groupby(obj, by, **kwds):
21082108
raise TypeError('invalid type: {}'.format(obj))
21092109

21102110
return klass(obj, by, **kwds)
2111+
2112+
2113+
class DataFrameGroups(dict):
2114+
def __repr__(self):
2115+
from pandas.io.formats.printing import _pprint_dict
2116+
return _pprint_dict(self, max_seq_items=get_option('display.max_rows'),
2117+
recurse=False, truncate_at='middle')

pandas/io/formats/printing.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def _join_unicode(lines, sep=''):
9595
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
9696
"""
9797
internal. pprinter for iterables. you should probably use pprint_thing()
98-
rather then calling this directly.
98+
rather than calling this directly.
9999
100100
bounds length of printed sequence, depending on options
101101
"""
@@ -124,11 +124,13 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
124124
return fmt.format(body=body)
125125

126126

127-
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
127+
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, recurse=True,
128+
truncate_at='end', **kwds):
128129
"""
129130
internal. pprinter for iterables. you should probably use pprint_thing()
130-
rather then calling this directly.
131+
rather than calling this directly.
131132
"""
133+
132134
fmt = u("{{{things}}}")
133135
pairs = []
134136

@@ -139,16 +141,26 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
139141
else:
140142
nitems = max_seq_items or get_option("max_seq_items") or len(seq)
141143

142-
for k, v in list(seq.items())[:nitems]:
143-
pairs.append(
144-
pfmt.format(
145-
key=pprint_thing(k, _nest_lvl + 1,
146-
max_seq_items=max_seq_items, **kwds),
147-
val=pprint_thing(v, _nest_lvl + 1,
148-
max_seq_items=max_seq_items, **kwds)))
144+
if recurse:
145+
for k, v in list(seq.items())[:nitems]:
146+
pairs.append(
147+
pfmt.format(
148+
key=pprint_thing(k, _nest_lvl + 1,
149+
max_seq_items=max_seq_items, **kwds),
150+
val=pprint_thing(v, _nest_lvl + 1,
151+
max_seq_items=max_seq_items, **kwds)))
152+
else:
153+
for k, v in list(seq.items())[:nitems]:
154+
pairs.append(pfmt.format(key=k, val=v))
149155

150156
if nitems < len(seq):
151-
return fmt.format(things=", ".join(pairs) + ", ...")
157+
if truncate_at == 'middle':
158+
start_cnt, end_cnt = nitems - int(nitems / 2), int(nitems / 2)
159+
return fmt.format(things=", ".join(pairs[:start_cnt]) +
160+
", ... , " +
161+
", ".join(pairs[end_cnt:]))
162+
else:
163+
return fmt.format(things=", ".join(pairs) + ", ...")
152164
else:
153165
return fmt.format(things=", ".join(pairs))
154166

pandas/tests/io/formats/test_format.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,6 +1748,22 @@ def test_period(self):
17481748
assert str(df) == exp
17491749

17501750

1751+
class TestDataFrameGroupByFormatting(object):
1752+
def test_groups_repr_truncates(self):
1753+
df = pd.DataFrame({
1754+
'a': [1, 1, 1, 2, 2, 3],
1755+
'b': [1, 2, 3, 4, 5, 6]
1756+
})
1757+
1758+
with option_context('display.max_rows', 2):
1759+
x = df.groupby('a').groups
1760+
1761+
expected = ("{1: Int64Index([0, 1, 2], dtype='int64'), ... , "
1762+
"2: Int64Index([3, 4], dtype='int64')}")
1763+
1764+
assert x.__repr__() == expected
1765+
1766+
17511767
def gen_series_formatting():
17521768
s1 = pd.Series(['a'] * 100)
17531769
s2 = pd.Series(['ab'] * 100)

0 commit comments

Comments
 (0)