Skip to content

Commit 4614ac8

Browse files
committed
Merge pull request #6914 from sinhrks/getgroup
BUG: GroupBy.get_group doesnt work with TimeGrouper
2 parents 3aee98d + 85157f0 commit 4614ac8

File tree

3 files changed

+62
-0
lines changed

3 files changed

+62
-0
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ Bug Fixes
410410
- Better error message when passing a frequency of 'MS' in ``Period`` construction (GH5332)
411411
- Bug in `Series.__unicode__` when `max_rows` is `None` and the Series has more than 1000 rows. (:issue:`6863`)
412412
- Bug in ``groupby.get_group`` where a datetlike wasn't always accepted (:issue:`5267`)
413+
- Bug in ``groupBy.get_group`` created by ``TimeGrouper`` raises ``AttributeError`` (:issue:`6914`)
413414
- Bug in ``DatetimeIndex.tz_localize`` and ``DatetimeIndex.tz_convert`` affects to NaT (:issue:`5546`)
414415
- Bug in arithmetic operations affecting to NaT (:issue:`6873`)
415416
- Bug in ``Series.str.extract`` where the resulting ``Series`` from a single

pandas/core/groupby.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from functools import wraps
33
import numpy as np
44
import datetime
5+
import collections
56

67
from pandas.compat import(
78
zip, builtins, range, long, lrange, lzip,
@@ -1556,6 +1557,17 @@ def apply(self, f, data, axis=0):
15561557

15571558
return result_keys, result_values, mutated
15581559

1560+
@cache_readonly
1561+
def indices(self):
1562+
indices = collections.defaultdict(list)
1563+
1564+
i = 0
1565+
for label, bin in zip(self.binlabels, self.bins):
1566+
if i < bin:
1567+
indices[label] = list(range(i, bin))
1568+
i = bin
1569+
return indices
1570+
15591571
@cache_readonly
15601572
def ngroups(self):
15611573
return len(self.binlabels)

pandas/tests/test_groupby.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3140,6 +3140,55 @@ def test_timegrouper_with_reg_groups(self):
31403140
result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])['whole_cost'].sum()
31413141
assert_series_equal(result2, expected)
31423142

3143+
def test_timegrouper_get_group(self):
3144+
# GH 6914
3145+
3146+
df_original = DataFrame({
3147+
'Buyer': 'Carl Joe Joe Carl Joe Carl'.split(),
3148+
'Quantity': [18,3,5,1,9,3],
3149+
'Date' : [datetime(2013,9,1,13,0), datetime(2013,9,1,13,5),
3150+
datetime(2013,10,1,20,0), datetime(2013,10,3,10,0),
3151+
datetime(2013,12,2,12,0), datetime(2013,9,2,14,0),]})
3152+
df_reordered = df_original.sort(columns='Quantity')
3153+
3154+
# single grouping
3155+
expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]],
3156+
df_original.iloc[[4]]]
3157+
dt_list = ['2013-09-30', '2013-10-31', '2013-12-31']
3158+
3159+
for df in [df_original, df_reordered]:
3160+
grouped = df.groupby(pd.Grouper(freq='M', key='Date'))
3161+
for t, expected in zip(dt_list, expected_list):
3162+
dt = pd.Timestamp(t)
3163+
result = grouped.get_group(dt)
3164+
assert_frame_equal(result, expected)
3165+
3166+
# multiple grouping
3167+
expected_list = [df_original.iloc[[1]], df_original.iloc[[3]],
3168+
df_original.iloc[[4]]]
3169+
g_list = [('Joe', '2013-09-30'), ('Carl', '2013-10-31'), ('Joe', '2013-12-31')]
3170+
3171+
for df in [df_original, df_reordered]:
3172+
grouped = df.groupby(['Buyer', pd.Grouper(freq='M', key='Date')])
3173+
for (b, t), expected in zip(g_list, expected_list):
3174+
dt = pd.Timestamp(t)
3175+
result = grouped.get_group((b, dt))
3176+
assert_frame_equal(result, expected)
3177+
3178+
# with index
3179+
df_original = df_original.set_index('Date')
3180+
df_reordered = df_original.sort(columns='Quantity')
3181+
3182+
expected_list = [df_original.iloc[[0, 1, 5]], df_original.iloc[[2, 3]],
3183+
df_original.iloc[[4]]]
3184+
3185+
for df in [df_original, df_reordered]:
3186+
grouped = df.groupby(pd.Grouper(freq='M'))
3187+
for t, expected in zip(dt_list, expected_list):
3188+
dt = pd.Timestamp(t)
3189+
result = grouped.get_group(dt)
3190+
assert_frame_equal(result, expected)
3191+
31433192
def test_cumcount(self):
31443193
df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'])
31453194
g = df.groupby('A')

0 commit comments

Comments
 (0)