Skip to content

TST: check internal Categorical #13249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.18.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ Bug Fixes
- Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
- Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
- Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)

- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)


- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
9 changes: 6 additions & 3 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,12 @@ def get_result(self):

# may need to coerce categoricals here
if self.is_categorical is not None:
values = [Categorical.from_array(
values[:, i], categories=self.is_categorical.categories,
ordered=True) for i in range(values.shape[-1])]
categories = self.is_categorical.categories
ordered = self.is_categorical.ordered
values = [Categorical.from_array(values[:, i],
categories=categories,
ordered=ordered)
for i in range(values.shape[-1])]

return DataFrame(values, index=index, columns=columns)

Expand Down
17 changes: 17 additions & 0 deletions pandas/io/tests/test_pickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ def compare_series_dt_tz(self, result, expected, typ, version):
else:
tm.assert_series_equal(result, expected)

def compare_series_cat(self, result, expected, typ, version):
# Categorical.ordered is changed in < 0.16.0
if LooseVersion(version) < '0.16.0':
tm.assert_series_equal(result, expected, check_categorical=False)
else:
tm.assert_series_equal(result, expected)

def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
# 8260
# dtype is object < 0.17.0
Expand All @@ -117,6 +124,16 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
else:
tm.assert_frame_equal(result, expected)

def compare_frame_cat_onecol(self, result, expected, typ, version):
# Categorical.ordered is changed in < 0.16.0
if LooseVersion(version) < '0.16.0':
tm.assert_frame_equal(result, expected, check_categorical=False)
else:
tm.assert_frame_equal(result, expected)

def compare_frame_cat_and_float(self, result, expected, typ, version):
self.compare_frame_cat_onecol(result, expected, typ, version)

def compare_index_period(self, result, expected, typ, version):
tm.assert_index_equal(result, expected)
tm.assertIsInstance(result.freq, MonthEnd)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,7 +1004,7 @@ def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
nan_rep=nan_rep)
retr = read_hdf(store, key)
s_nan = s.replace(nan_rep, np.nan)
assert_series_equal(s_nan, retr)
assert_series_equal(s_nan, retr, check_categorical=False)

for s in examples:
roundtrip(s)
Expand Down
33 changes: 19 additions & 14 deletions pandas/io/tests/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,11 @@ def test_read_dta4(self):
expected = pd.concat([expected[col].astype('category')
for col in expected], axis=1)

tm.assert_frame_equal(parsed_113, expected)
tm.assert_frame_equal(parsed_114, expected)
tm.assert_frame_equal(parsed_115, expected)
tm.assert_frame_equal(parsed_117, expected)
# stata doesn't save .category metadata
tm.assert_frame_equal(parsed_113, expected, check_categorical=False)
tm.assert_frame_equal(parsed_114, expected, check_categorical=False)
tm.assert_frame_equal(parsed_115, expected, check_categorical=False)
tm.assert_frame_equal(parsed_117, expected, check_categorical=False)

# File containing strls
def test_read_dta12(self):
Expand Down Expand Up @@ -872,8 +873,8 @@ def test_categorical_writing(self):
# Silence warnings
original.to_stata(path)
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(
written_and_read_again.set_index('index'), expected)
res = written_and_read_again.set_index('index')
tm.assert_frame_equal(res, expected, check_categorical=False)

def test_categorical_warnings_and_errors(self):
# Warning for non-string labels
Expand Down Expand Up @@ -915,8 +916,8 @@ def test_categorical_with_stata_missing_values(self):
with tm.ensure_clean() as path:
original.to_stata(path)
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(
written_and_read_again.set_index('index'), original)
res = written_and_read_again.set_index('index')
tm.assert_frame_equal(res, original, check_categorical=False)

def test_categorical_order(self):
# Directly construct using expected codes
Expand Down Expand Up @@ -945,8 +946,8 @@ def test_categorical_order(self):
# Read with and with out categoricals, ensure order is identical
parsed_115 = read_stata(self.dta19_115)
parsed_117 = read_stata(self.dta19_117)
tm.assert_frame_equal(expected, parsed_115)
tm.assert_frame_equal(expected, parsed_117)
tm.assert_frame_equal(expected, parsed_115, check_categorical=False)
tm.assert_frame_equal(expected, parsed_117, check_categorical=False)

# Check identity of codes
for col in expected:
Expand All @@ -969,8 +970,10 @@ def test_categorical_sorting(self):
categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
cat = pd.Categorical.from_codes(codes=codes, categories=categories)
expected = pd.Series(cat, name='srh')
tm.assert_series_equal(expected, parsed_115["srh"])
tm.assert_series_equal(expected, parsed_117["srh"])
tm.assert_series_equal(expected, parsed_115["srh"],
check_categorical=False)
tm.assert_series_equal(expected, parsed_117["srh"],
check_categorical=False)

def test_categorical_ordering(self):
parsed_115 = read_stata(self.dta19_115)
Expand Down Expand Up @@ -1021,7 +1024,8 @@ def test_read_chunks_117(self):
from_frame = parsed.iloc[pos:pos + chunksize, :]
tm.assert_frame_equal(
from_frame, chunk, check_dtype=False,
check_datetimelike_compat=True)
check_datetimelike_compat=True,
check_categorical=False)

pos += chunksize
itr.close()
Expand Down Expand Up @@ -1087,7 +1091,8 @@ def test_read_chunks_115(self):
from_frame = parsed.iloc[pos:pos + chunksize, :]
tm.assert_frame_equal(
from_frame, chunk, check_dtype=False,
check_datetimelike_compat=True)
check_datetimelike_compat=True,
check_categorical=False)

pos += chunksize
itr.close()
Expand Down
39 changes: 21 additions & 18 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ def test_unstack_fill(self):
index=['x', 'y', 'z'], dtype=np.float)
assert_frame_equal(result, expected)

def test_unstack_fill_frame(self):

# From a dataframe
rows = [[1, 2], [3, 4], [5, 6], [7, 8]]
df = DataFrame(rows, columns=list('AB'), dtype=np.int32)
Expand Down Expand Up @@ -190,6 +192,8 @@ def test_unstack_fill(self):
[('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
assert_frame_equal(result, expected)

def test_unstack_fill_frame_datetime(self):

# Test unstacking with date times
dv = pd.date_range('2012-01-01', periods=4).values
data = Series(dv)
Expand All @@ -208,6 +212,8 @@ def test_unstack_fill(self):
index=['x', 'y', 'z'])
assert_frame_equal(result, expected)

def test_unstack_fill_frame_timedelta(self):

# Test unstacking with time deltas
td = [Timedelta(days=i) for i in range(4)]
data = Series(td)
Expand All @@ -226,6 +232,8 @@ def test_unstack_fill(self):
index=['x', 'y', 'z'])
assert_frame_equal(result, expected)

def test_unstack_fill_frame_period(self):

# Test unstacking with period
periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
Period('2012-04')]
Expand All @@ -245,6 +253,8 @@ def test_unstack_fill(self):
index=['x', 'y', 'z'])
assert_frame_equal(result, expected)

def test_unstack_fill_frame_categorical(self):

# Test unstacking with categorical
data = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
data.index = pd.MultiIndex.from_tuples(
Expand Down Expand Up @@ -273,27 +283,20 @@ def test_unstack_fill(self):
assert_frame_equal(result, expected)

def test_stack_ints(self):
df = DataFrame(
np.random.randn(30, 27),
columns=MultiIndex.from_tuples(
list(itertools.product(range(3), repeat=3))
)
)
assert_frame_equal(
df.stack(level=[1, 2]),
df.stack(level=1).stack(level=1)
)
assert_frame_equal(
df.stack(level=[-2, -1]),
df.stack(level=1).stack(level=1)
)
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
repeat=3)))
df = DataFrame(np.random.randn(30, 27), columns=columns)

assert_frame_equal(df.stack(level=[1, 2]),
df.stack(level=1).stack(level=1))
assert_frame_equal(df.stack(level=[-2, -1]),
df.stack(level=1).stack(level=1))

df_named = df.copy()
df_named.columns.set_names(range(3), inplace=True)
assert_frame_equal(
df_named.stack(level=[1, 2]),
df_named.stack(level=1).stack(level=1)
)

assert_frame_equal(df_named.stack(level=[1, 2]),
df_named.stack(level=1).stack(level=1))

def test_stack_mixed_levels(self):
columns = MultiIndex.from_tuples(
Expand Down
25 changes: 16 additions & 9 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,17 @@ def test_loc_listlike_dtypes(self):

# unique slice
res = df.loc[['a', 'b']]
exp = DataFrame({'A': [1, 2],
'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b']))
exp_index = pd.CategoricalIndex(['a', 'b'],
categories=index.categories)
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)

# duplicated slice
res = df.loc[['a', 'a', 'b']]
exp = DataFrame({'A': [1, 1, 2],
'B': [4, 4, 5]},
index=pd.CategoricalIndex(['a', 'a', 'b']))

exp_index = pd.CategoricalIndex(['a', 'a', 'b'],
categories=index.categories)
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp, check_index_type=True)

with tm.assertRaisesRegexp(
Expand Down Expand Up @@ -194,12 +196,15 @@ def test_ix_categorical_index(self):
expect = pd.Series(df.ix[:, 'X'], index=cdf.index, name='X')
assert_series_equal(cdf.ix[:, 'X'], expect)

exp_index = pd.CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
index=pd.CategoricalIndex(list('AB')))
index=exp_index)
assert_frame_equal(cdf.ix[['A', 'B'], :], expect)

exp_columns = pd.CategoricalIndex(list('XY'),
categories=['X', 'Y', 'Z'])
expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index,
columns=pd.CategoricalIndex(list('XY')))
columns=exp_columns)
assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect)

# non-unique
Expand All @@ -209,12 +214,14 @@ def test_ix_categorical_index(self):
cdf.index = pd.CategoricalIndex(df.index)
cdf.columns = pd.CategoricalIndex(df.columns)

exp_index = pd.CategoricalIndex(list('AA'), categories=['A', 'B'])
expect = pd.DataFrame(df.ix['A', :], columns=cdf.columns,
index=pd.CategoricalIndex(list('AA')))
index=exp_index)
assert_frame_equal(cdf.ix['A', :], expect)

exp_columns = pd.CategoricalIndex(list('XX'), categories=['X', 'Y'])
expect = pd.DataFrame(df.ix[:, 'X'], index=cdf.index,
columns=pd.CategoricalIndex(list('XX')))
columns=exp_columns)
assert_frame_equal(cdf.ix[:, 'X'], expect)

expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/series/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ def test_map(self):
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))

exp = Series([np.nan, 'B', 'C', 'D'], dtype='category')
exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
categories=['B', 'C', 'D', 'E']))
self.assert_series_equal(a.map(b), exp)
exp = Series([np.nan, 'B', 'C', 'D'])
self.assert_series_equal(a.map(c), exp)
Expand Down
Loading