diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
index a77bdcec2ce7a..4b3c96da10efd 100644
--- a/doc/source/whatsnew/v0.18.2.txt
+++ b/doc/source/whatsnew/v0.18.2.txt
@@ -180,7 +180,7 @@ Bug Fixes
 - Bug in ``Period`` addition raises ``TypeError`` if ``Period`` is on right hand side (:issue:`13069`)
 - Bug in ``Peirod`` and ``Series`` or ``Index`` comparison raises ``TypeError`` (:issue:`13200`)
 - Bug in ``pd.set_eng_float_format()`` that would prevent NaN's from formatting (:issue:`11981`)
-
+- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
 
 
 - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
index 7e0c094aec4c2..8d237016d1b33 100644
--- a/pandas/core/reshape.py
+++ b/pandas/core/reshape.py
@@ -162,9 +162,12 @@ def get_result(self):
 
         # may need to coerce categoricals here
         if self.is_categorical is not None:
-            values = [Categorical.from_array(
-                values[:, i], categories=self.is_categorical.categories,
-                ordered=True) for i in range(values.shape[-1])]
+            categories = self.is_categorical.categories
+            ordered = self.is_categorical.ordered
+            values = [Categorical.from_array(values[:, i],
+                                             categories=categories,
+                                             ordered=ordered)
+                      for i in range(values.shape[-1])]
 
         return DataFrame(values, index=index, columns=columns)
 
diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py
index 4ff0363d07df6..7f2813d5281cb 100644
--- a/pandas/io/tests/test_pickle.py
+++ b/pandas/io/tests/test_pickle.py
@@ -108,6 +108,13 @@ def compare_series_dt_tz(self, result, expected, typ, version):
         else:
             tm.assert_series_equal(result, expected)
 
+    def compare_series_cat(self, result, expected, typ, version):
+        # Categorical.ordered is changed in < 0.16.0
+        if LooseVersion(version) < '0.16.0':
+            tm.assert_series_equal(result, expected, check_categorical=False)
+        else:
+            tm.assert_series_equal(result, expected)
+
     def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         # 8260
         # dtype is object < 0.17.0
@@ -117,6 +124,16 @@ def compare_frame_dt_mixed_tzs(self, result, expected, typ, version):
         else:
             tm.assert_frame_equal(result, expected)
 
+    def compare_frame_cat_onecol(self, result, expected, typ, version):
+        # Categorical.ordered is changed in < 0.16.0
+        if LooseVersion(version) < '0.16.0':
+            tm.assert_frame_equal(result, expected, check_categorical=False)
+        else:
+            tm.assert_frame_equal(result, expected)
+
+    def compare_frame_cat_and_float(self, result, expected, typ, version):
+        self.compare_frame_cat_onecol(result, expected, typ, version)
+
     def compare_index_period(self, result, expected, typ, version):
         tm.assert_index_equal(result, expected)
         tm.assertIsInstance(result.freq, MonthEnd)
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 6bf0175526424..5ee84ce97979a 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -1004,7 +1004,7 @@ def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
                          nan_rep=nan_rep)
                 retr = read_hdf(store, key)
                 s_nan = s.replace(nan_rep, np.nan)
-                assert_series_equal(s_nan, retr)
+                assert_series_equal(s_nan, retr, check_categorical=False)
 
         for s in examples:
             roundtrip(s)
diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py
index fe782bb86d1be..17f74d5789298 100644
--- a/pandas/io/tests/test_stata.py
+++ b/pandas/io/tests/test_stata.py
@@ -234,10 +234,11 @@ def test_read_dta4(self):
         expected = pd.concat([expected[col].astype('category')
                               for col in expected], axis=1)
 
-        tm.assert_frame_equal(parsed_113, expected)
-        tm.assert_frame_equal(parsed_114, expected)
-        tm.assert_frame_equal(parsed_115, expected)
-        tm.assert_frame_equal(parsed_117, expected)
+        # stata doesn't save .category metadata
+        tm.assert_frame_equal(parsed_113, expected, check_categorical=False)
+        tm.assert_frame_equal(parsed_114, expected, check_categorical=False)
+        tm.assert_frame_equal(parsed_115, expected, check_categorical=False)
+        tm.assert_frame_equal(parsed_117, expected, check_categorical=False)
 
     # File containing strls
     def test_read_dta12(self):
@@ -872,8 +873,8 @@ def test_categorical_writing(self):
                 # Silence warnings
                 original.to_stata(path)
                 written_and_read_again = self.read_dta(path)
-                tm.assert_frame_equal(
-                    written_and_read_again.set_index('index'), expected)
+                res = written_and_read_again.set_index('index')
+                tm.assert_frame_equal(res, expected, check_categorical=False)
 
     def test_categorical_warnings_and_errors(self):
         # Warning for non-string labels
@@ -915,8 +916,8 @@ def test_categorical_with_stata_missing_values(self):
         with tm.ensure_clean() as path:
             original.to_stata(path)
             written_and_read_again = self.read_dta(path)
-            tm.assert_frame_equal(
-                written_and_read_again.set_index('index'), original)
+            res = written_and_read_again.set_index('index')
+            tm.assert_frame_equal(res, original, check_categorical=False)
 
     def test_categorical_order(self):
         # Directly construct using expected codes
@@ -945,8 +946,8 @@ def test_categorical_order(self):
         # Read with and with out categoricals, ensure order is identical
         parsed_115 = read_stata(self.dta19_115)
         parsed_117 = read_stata(self.dta19_117)
-        tm.assert_frame_equal(expected, parsed_115)
-        tm.assert_frame_equal(expected, parsed_117)
+        tm.assert_frame_equal(expected, parsed_115, check_categorical=False)
+        tm.assert_frame_equal(expected, parsed_117, check_categorical=False)
 
         # Check identity of codes
         for col in expected:
@@ -969,8 +970,10 @@ def test_categorical_sorting(self):
         categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
         cat = pd.Categorical.from_codes(codes=codes, categories=categories)
         expected = pd.Series(cat, name='srh')
-        tm.assert_series_equal(expected, parsed_115["srh"])
-        tm.assert_series_equal(expected, parsed_117["srh"])
+        tm.assert_series_equal(expected, parsed_115["srh"],
+                               check_categorical=False)
+        tm.assert_series_equal(expected, parsed_117["srh"],
+                               check_categorical=False)
 
     def test_categorical_ordering(self):
         parsed_115 = read_stata(self.dta19_115)
@@ -1021,7 +1024,8 @@ def test_read_chunks_117(self):
                             from_frame = parsed.iloc[pos:pos + chunksize, :]
                             tm.assert_frame_equal(
                                 from_frame, chunk, check_dtype=False,
-                                check_datetimelike_compat=True)
+                                check_datetimelike_compat=True,
+                                check_categorical=False)
 
                             pos += chunksize
                         itr.close()
@@ -1087,7 +1091,8 @@ def test_read_chunks_115(self):
                             from_frame = parsed.iloc[pos:pos + chunksize, :]
                             tm.assert_frame_equal(
                                 from_frame, chunk, check_dtype=False,
-                                check_datetimelike_compat=True)
+                                check_datetimelike_compat=True,
+                                check_categorical=False)
 
                             pos += chunksize
                         itr.close()
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index e7d64324e6590..43c288162b134 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -158,6 +158,8 @@ def test_unstack_fill(self):
                              index=['x', 'y', 'z'], dtype=np.float)
         assert_frame_equal(result, expected)
 
+    def test_unstack_fill_frame(self):
+
         # From a dataframe
         rows = [[1, 2], [3, 4], [5, 6], [7, 8]]
         df = DataFrame(rows, columns=list('AB'), dtype=np.int32)
@@ -190,6 +192,8 @@ def test_unstack_fill(self):
             [('A', 'a'), ('A', 'b'), ('B', 'a'), ('B', 'b')])
         assert_frame_equal(result, expected)
 
+    def test_unstack_fill_frame_datetime(self):
+
         # Test unstacking with date times
         dv = pd.date_range('2012-01-01', periods=4).values
         data = Series(dv)
@@ -208,6 +212,8 @@ def test_unstack_fill(self):
                              index=['x', 'y', 'z'])
         assert_frame_equal(result, expected)
 
+    def test_unstack_fill_frame_timedelta(self):
+
         # Test unstacking with time deltas
         td = [Timedelta(days=i) for i in range(4)]
         data = Series(td)
@@ -226,6 +232,8 @@ def test_unstack_fill(self):
                              index=['x', 'y', 'z'])
         assert_frame_equal(result, expected)
 
+    def test_unstack_fill_frame_period(self):
+
         # Test unstacking with period
         periods = [Period('2012-01'), Period('2012-02'), Period('2012-03'),
                    Period('2012-04')]
@@ -245,6 +253,8 @@ def test_unstack_fill(self):
                              index=['x', 'y', 'z'])
         assert_frame_equal(result, expected)
 
+    def test_unstack_fill_frame_categorical(self):
+
         # Test unstacking with categorical
         data = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
         data.index = pd.MultiIndex.from_tuples(
@@ -273,27 +283,20 @@ def test_unstack_fill(self):
         assert_frame_equal(result, expected)
 
     def test_stack_ints(self):
-        df = DataFrame(
-            np.random.randn(30, 27),
-            columns=MultiIndex.from_tuples(
-                list(itertools.product(range(3), repeat=3))
-            )
-        )
-        assert_frame_equal(
-            df.stack(level=[1, 2]),
-            df.stack(level=1).stack(level=1)
-        )
-        assert_frame_equal(
-            df.stack(level=[-2, -1]),
-            df.stack(level=1).stack(level=1)
-        )
+        columns = MultiIndex.from_tuples(list(itertools.product(range(3),
+                                                                repeat=3)))
+        df = DataFrame(np.random.randn(30, 27), columns=columns)
+
+        assert_frame_equal(df.stack(level=[1, 2]),
+                           df.stack(level=1).stack(level=1))
+        assert_frame_equal(df.stack(level=[-2, -1]),
+                           df.stack(level=1).stack(level=1))
 
         df_named = df.copy()
         df_named.columns.set_names(range(3), inplace=True)
-        assert_frame_equal(
-            df_named.stack(level=[1, 2]),
-            df_named.stack(level=1).stack(level=1)
-        )
+
+        assert_frame_equal(df_named.stack(level=[1, 2]),
+                           df_named.stack(level=1).stack(level=1))
 
     def test_stack_mixed_levels(self):
         columns = MultiIndex.from_tuples(
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 53ab9aca03f6c..2cb62a60f885b 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -108,15 +108,17 @@ def test_loc_listlike_dtypes(self):
 
         # unique slice
         res = df.loc[['a', 'b']]
-        exp = DataFrame({'A': [1, 2],
-                         'B': [4, 5]}, index=pd.CategoricalIndex(['a', 'b']))
+        exp_index = pd.CategoricalIndex(['a', 'b'],
+                                        categories=index.categories)
+        exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
         tm.assert_frame_equal(res, exp, check_index_type=True)
 
         # duplicated slice
         res = df.loc[['a', 'a', 'b']]
-        exp = DataFrame({'A': [1, 1, 2],
-                         'B': [4, 4, 5]},
-                        index=pd.CategoricalIndex(['a', 'a', 'b']))
+
+        exp_index = pd.CategoricalIndex(['a', 'a', 'b'],
+                                        categories=index.categories)
+        exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
         tm.assert_frame_equal(res, exp, check_index_type=True)
 
         with tm.assertRaisesRegexp(
@@ -194,12 +196,15 @@ def test_ix_categorical_index(self):
         expect = pd.Series(df.ix[:, 'X'], index=cdf.index, name='X')
         assert_series_equal(cdf.ix[:, 'X'], expect)
 
+        exp_index = pd.CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
         expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
-                              index=pd.CategoricalIndex(list('AB')))
+                              index=exp_index)
         assert_frame_equal(cdf.ix[['A', 'B'], :], expect)
 
+        exp_columns = pd.CategoricalIndex(list('XY'),
+                                          categories=['X', 'Y', 'Z'])
         expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index,
-                              columns=pd.CategoricalIndex(list('XY')))
+                              columns=exp_columns)
         assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect)
 
         # non-unique
@@ -209,12 +214,14 @@ def test_ix_categorical_index(self):
         cdf.index = pd.CategoricalIndex(df.index)
         cdf.columns = pd.CategoricalIndex(df.columns)
 
+        exp_index = pd.CategoricalIndex(list('AA'), categories=['A', 'B'])
         expect = pd.DataFrame(df.ix['A', :], columns=cdf.columns,
-                              index=pd.CategoricalIndex(list('AA')))
+                              index=exp_index)
         assert_frame_equal(cdf.ix['A', :], expect)
 
+        exp_columns = pd.CategoricalIndex(list('XX'), categories=['X', 'Y'])
         expect = pd.DataFrame(df.ix[:, 'X'], index=cdf.index,
-                              columns=pd.CategoricalIndex(list('XX')))
+                              columns=exp_columns)
         assert_frame_equal(cdf.ix[:, 'X'], expect)
 
         expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns,
diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py
index 6e0a0175b403f..9cb1e9dd93d16 100644
--- a/pandas/tests/series/test_apply.py
+++ b/pandas/tests/series/test_apply.py
@@ -187,7 +187,8 @@ def test_map(self):
                    index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
         c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))
 
-        exp = Series([np.nan, 'B', 'C', 'D'], dtype='category')
+        exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
+                                    categories=['B', 'C', 'D', 'E']))
         self.assert_series_equal(a.map(b), exp)
         exp = Series([np.nan, 'B', 'C', 'D'])
         self.assert_series_equal(a.map(c), exp)
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 5a6667e57ce9d..40ef5354e91bd 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -556,28 +556,35 @@ def test_categories_none(self):
     def test_describe(self):
         # string type
         desc = self.factor.describe()
+        self.assertTrue(self.factor.ordered)
+        exp_index = pd.CategoricalIndex(['a', 'b', 'c'], name='categories',
+                                        ordered=self.factor.ordered)
         expected = DataFrame({'counts': [3, 2, 3],
                               'freqs': [3 / 8., 2 / 8., 3 / 8.]},
-                             index=pd.CategoricalIndex(['a', 'b', 'c'],
-                                                       name='categories'))
+                             index=exp_index)
         tm.assert_frame_equal(desc, expected)
 
         # check unused categories
         cat = self.factor.copy()
         cat.set_categories(["a", "b", "c", "d"], inplace=True)
         desc = cat.describe()
+
+        exp_index = pd.CategoricalIndex(['a', 'b', 'c', 'd'],
+                                        ordered=self.factor.ordered,
+                                        name='categories')
         expected = DataFrame({'counts': [3, 2, 3, 0],
                               'freqs': [3 / 8., 2 / 8., 3 / 8., 0]},
-                             index=pd.CategoricalIndex(['a', 'b', 'c', 'd'],
-                                                       name='categories'))
+                             index=exp_index)
         tm.assert_frame_equal(desc, expected)
 
         # check an integer one
-        desc = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1]).describe()
+        cat = Categorical([1, 2, 3, 1, 2, 3, 3, 2, 1, 1, 1])
+        desc = cat.describe()
+        exp_index = pd.CategoricalIndex([1, 2, 3], ordered=cat.ordered,
+                                        name='categories')
         expected = DataFrame({'counts': [5, 3, 3],
                               'freqs': [5 / 11., 3 / 11., 3 / 11.]},
-                             index=pd.CategoricalIndex([1, 2, 3],
-                                                       name='categories'))
+                             index=exp_index)
         tm.assert_frame_equal(desc, expected)
 
         # https://github.com/pydata/pandas/issues/3678
@@ -601,7 +608,7 @@ def test_describe(self):
                              columns=['counts', 'freqs'],
                              index=pd.CategoricalIndex(['b', 'a', 'c', np.nan],
                                                        name='categories'))
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_categorical=False)
 
         # NA as an unused category
         with tm.assert_produces_warning(FutureWarning):
@@ -613,7 +620,7 @@ def test_describe(self):
             ['b', 'a', 'c', np.nan], name='categories')
         expected = DataFrame([[0, 0], [1, 1 / 3.], [2, 2 / 3.], [0, 0]],
                              columns=['counts', 'freqs'], index=exp_idx)
-        tm.assert_frame_equal(result, expected)
+        tm.assert_frame_equal(result, expected, check_categorical=False)
 
     def test_print(self):
         expected = ["[a, b, b, a, a, c, c, c]",
@@ -2885,13 +2892,17 @@ def test_value_counts(self):
                               categories=["c", "a", "b", "d"])
         s = pd.Series(cats, name='xxx')
         res = s.value_counts(sort=False)
-        exp = Series([3, 1, 2, 0], name='xxx',
-                     index=pd.CategoricalIndex(["c", "a", "b", "d"]))
+
+        exp_index = pd.CategoricalIndex(["c", "a", "b", "d"],
+                                        categories=cats.categories)
+        exp = Series([3, 1, 2, 0], name='xxx', index=exp_index)
         tm.assert_series_equal(res, exp)
 
         res = s.value_counts(sort=True)
-        exp = Series([3, 2, 1, 0], name='xxx',
-                     index=pd.CategoricalIndex(["c", "b", "a", "d"]))
+
+        exp_index = pd.CategoricalIndex(["c", "b", "a", "d"],
+                                        categories=cats.categories)
+        exp = Series([3, 2, 1, 0], name='xxx', index=exp_index)
         tm.assert_series_equal(res, exp)
 
         # check object dtype handles the Series.name as the same
@@ -2927,38 +2938,39 @@ def test_value_counts_with_nan(self):
                       index=pd.CategoricalIndex(["a", "b", np.nan])))
 
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            s = pd.Series(pd.Categorical(
-                ["a", "b", "a"], categories=["a", "b", np.nan]))
-            tm.assert_series_equal(
-                s.value_counts(dropna=True),
-                pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])))
-            tm.assert_series_equal(
-                s.value_counts(dropna=False),
-                pd.Series([2, 1, 0],
-                          index=pd.CategoricalIndex(["a", "b", np.nan])))
+            s = pd.Series(pd.Categorical(["a", "b", "a"],
+                                         categories=["a", "b", np.nan]))
+
+        # internal categories are different because of NaN
+        exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))
+        tm.assert_series_equal(s.value_counts(dropna=True), exp,
+                               check_categorical=False)
+        exp = pd.Series([2, 1, 0],
+                        index=pd.CategoricalIndex(["a", "b", np.nan]))
+        tm.assert_series_equal(s.value_counts(dropna=False), exp,
+                               check_categorical=False)
 
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
-            s = pd.Series(pd.Categorical(
-                ["a", "b", None, "a", None, None], categories=["a", "b", np.nan
-                                                               ]))
-            tm.assert_series_equal(
-                s.value_counts(dropna=True),
-                pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"])))
-            tm.assert_series_equal(
-                s.value_counts(dropna=False),
-                pd.Series([3, 2, 1],
-                          index=pd.CategoricalIndex([np.nan, "a", "b"])))
+            s = pd.Series(pd.Categorical(["a", "b", None, "a", None, None],
+                                         categories=["a", "b", np.nan]))
+
+        exp = pd.Series([2, 1], index=pd.CategoricalIndex(["a", "b"]))
+        tm.assert_series_equal(s.value_counts(dropna=True), exp,
+                               check_categorical=False)
+        exp = pd.Series([3, 2, 1],
+                        index=pd.CategoricalIndex([np.nan, "a", "b"]))
+        tm.assert_series_equal(s.value_counts(dropna=False), exp,
+                               check_categorical=False)
 
     def test_groupby(self):
 
-        cats = Categorical(
-            ["a", "a", "a", "b", "b", "b", "c", "c", "c"
-             ], categories=["a", "b", "c", "d"], ordered=True)
+        cats = Categorical(["a", "a", "a", "b", "b", "b", "c", "c", "c"],
+                           categories=["a", "b", "c", "d"], ordered=True)
         data = DataFrame({"a": [1, 1, 1, 2, 2, 2, 3, 4, 5], "b": cats})
 
-        expected = DataFrame({'a': Series(
-            [1, 2, 4, np.nan], index=pd.CategoricalIndex(
-                ['a', 'b', 'c', 'd'], name='b'))})
+        exp_index = pd.CategoricalIndex(['a', 'b', 'c', 'd'], name='b',
+                                        ordered=True)
+        expected = DataFrame({'a': [1, 2, 4, np.nan]}, index=exp_index)
         result = data.groupby("b").mean()
         tm.assert_frame_equal(result, expected)
 
@@ -2970,17 +2982,19 @@ def test_groupby(self):
 
         # single grouper
         gb = df.groupby("A")
-        exp_idx = pd.CategoricalIndex(['a', 'b', 'z'], name='A')
+        exp_idx = pd.CategoricalIndex(['a', 'b', 'z'], name='A', ordered=True)
         expected = DataFrame({'values': Series([3, 7, np.nan], index=exp_idx)})
         result = gb.sum()
         tm.assert_frame_equal(result, expected)
 
         # multiple groupers
         gb = df.groupby(['A', 'B'])
-        expected = DataFrame({'values': Series(
-            [1, 2, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan
-             ], index=pd.MultiIndex.from_product(
-                 [['a', 'b', 'z'], ['c', 'd', 'y']], names=['A', 'B']))})
+        exp_index = pd.MultiIndex.from_product([['a', 'b', 'z'],
+                                                ['c', 'd', 'y']],
+                                               names=['A', 'B'])
+        expected = DataFrame({'values': [1, 2, np.nan, 3, 4, np.nan,
+                                         np.nan, np.nan, np.nan]},
+                             index=exp_index)
         result = gb.sum()
         tm.assert_frame_equal(result, expected)
 
@@ -3054,8 +3068,10 @@ def f(x):
         df = pd.DataFrame({'a': [1, 0, 0, 0]})
         c = pd.cut(df.a, [0, 1, 2, 3, 4])
         result = df.groupby(c).apply(len)
-        expected = pd.Series([1, 0, 0, 0],
-                             index=pd.CategoricalIndex(c.values.categories))
+
+        exp_index = pd.CategoricalIndex(c.values.categories,
+                                        ordered=c.values.ordered)
+        expected = pd.Series([1, 0, 0, 0], index=exp_index)
         expected.index.name = 'a'
         tm.assert_series_equal(result, expected)
 
@@ -3369,30 +3385,28 @@ def test_assigning_ops(self):
         # assign a part of a column with dtype != categorical ->
         # exp_parts_cats_col
 
-        cats = pd.Categorical(
-            ["a", "a", "a", "a", "a", "a", "a"], categories=["a", "b"])
+        cats = pd.Categorical(["a", "a", "a", "a", "a", "a", "a"],
+                              categories=["a", "b"])
         idx = pd.Index(["h", "i", "j", "k", "l", "m", "n"])
         values = [1, 1, 1, 1, 1, 1, 1]
         orig = pd.DataFrame({"cats": cats, "values": values}, index=idx)
 
         # the expected values
         # changed single row
-        cats1 = pd.Categorical(
-            ["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
+        cats1 = pd.Categorical(["a", "a", "b", "a", "a", "a", "a"],
+                               categories=["a", "b"])
         idx1 = pd.Index(["h", "i", "j", "k", "l", "m", "n"])
         values1 = [1, 1, 2, 1, 1, 1, 1]
-        exp_single_row = pd.DataFrame(
-            {"cats": cats1,
-             "values": values1}, index=idx1)
+        exp_single_row = pd.DataFrame({"cats": cats1,
+                                       "values": values1}, index=idx1)
 
         # changed multiple rows
-        cats2 = pd.Categorical(
-            ["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
+        cats2 = pd.Categorical(["a", "a", "b", "b", "a", "a", "a"],
+                               categories=["a", "b"])
         idx2 = pd.Index(["h", "i", "j", "k", "l", "m", "n"])
         values2 = [1, 1, 2, 2, 1, 1, 1]
-        exp_multi_row = pd.DataFrame(
-            {"cats": cats2,
-             "values": values2}, index=idx2)
+        exp_multi_row = pd.DataFrame({"cats": cats2,
+                                      "values": values2}, index=idx2)
 
         # changed part of the cats column
         cats3 = pd.Categorical(
@@ -3653,7 +3667,8 @@ def f():
         exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)
 
         df[df["cats"] == "c"] = ["b", 2]
-        tm.assert_frame_equal(df, exp_multi_row)
+        # category c is kept in .categories
+        tm.assert_frame_equal(df, exp_fancy)
 
         # set_value
         df = orig.copy()
@@ -3708,7 +3723,7 @@ def f():
 
         # ensure that one can set something to np.nan
         s = Series(Categorical([1, 2, 3]))
-        exp = Series(Categorical([1, np.nan, 3]))
+        exp = Series(Categorical([1, np.nan, 3], categories=[1, 2, 3]))
         s[1] = np.nan
         tm.assert_series_equal(s, exp)
 
@@ -4083,10 +4098,12 @@ def f():
         c = Categorical(["a", "b", np.nan])
         with tm.assert_produces_warning(FutureWarning):
             c.set_categories(["a", "b", np.nan], rename=True, inplace=True)
+
         c[0] = np.nan
         df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]})
-        df_exp = pd.DataFrame({"cats": Categorical(["a", "b", "a"]),
-                               "vals": [1, 2, 3]})
+
+        cat_exp = Categorical(["a", "b", "a"], categories=["a", "b", np.nan])
+        df_exp = pd.DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})
 
         res = df.fillna("a")
         tm.assert_frame_equal(res, df_exp)
@@ -4128,7 +4145,9 @@ def cmp(a, b):
                       ]:
 
             result = valid(s)
-            tm.assert_series_equal(result, s)
+            # compare series values
+            # internal .categories can't be compared because it is sorted
+            tm.assert_series_equal(result, s, check_categorical=False)
 
         # invalid conversion (these are NOT a dtype)
         for invalid in [lambda x: x.astype(pd.Categorical),
diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py
index 2bad2fabcfc57..794b5e8aa5650 100644
--- a/pandas/tests/test_generic.py
+++ b/pandas/tests/test_generic.py
@@ -847,7 +847,7 @@ def test_to_xarray(self):
         assert_almost_equal(list(result.coords.keys()), ['foo'])
         self.assertIsInstance(result, DataArray)
 
-        def testit(index, check_index_type=True):
+        def testit(index, check_index_type=True, check_categorical=True):
             s = Series(range(6), index=index(6))
             s.index.name = 'foo'
             result = s.to_xarray()
@@ -859,7 +859,8 @@ def testit(index, check_index_type=True):
 
             # idempotency
             assert_series_equal(result.to_series(), s,
-                                check_index_type=check_index_type)
+                                check_index_type=check_index_type,
+                                check_categorical=check_categorical)
 
         for index in [tm.makeFloatIndex, tm.makeIntIndex,
                       tm.makeStringIndex, tm.makeUnicodeIndex,
@@ -868,7 +869,8 @@ def testit(index, check_index_type=True):
             testit(index)
 
         # not idempotent
-        testit(tm.makeCategoricalIndex, check_index_type=False)
+        testit(tm.makeCategoricalIndex, check_index_type=False,
+               check_categorical=False)
 
         s = Series(range(6))
         s.index.name = 'foo'
@@ -1409,9 +1411,8 @@ def test_to_xarray(self):
             expected['f'] = expected['f'].astype(object)
             expected['h'] = expected['h'].astype('datetime64[ns]')
             expected.columns.name = None
-            assert_frame_equal(result.to_dataframe(),
-                               expected,
-                               check_index_type=False)
+            assert_frame_equal(result.to_dataframe(), expected,
+                               check_index_type=False, check_categorical=False)
 
         # available in 0.7.1
         # MultiIndex
diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
index 5dfe88d04309e..38e6a066d3eea 100644
--- a/pandas/tests/test_groupby.py
+++ b/pandas/tests/test_groupby.py
@@ -3868,8 +3868,8 @@ def test_groupby_sort_categorical(self):
                         ['(0, 2.5]', 1, 60],
                         ['(5, 7.5]', 7, 70]], columns=['range', 'foo', 'bar'])
         df['range'] = Categorical(df['range'], ordered=True)
-        index = CategoricalIndex(
-            ['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range')
+        index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
+                                  '(7.5, 10]'], name='range', ordered=True)
         result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
                                 columns=['foo', 'bar'], index=index)
 
@@ -3879,13 +3879,15 @@ def test_groupby_sort_categorical(self):
         assert_frame_equal(result_sort, df.groupby(col, sort=False).first())
 
         df['range'] = Categorical(df['range'], ordered=False)
-        index = CategoricalIndex(
-            ['(0, 2.5]', '(2.5, 5]', '(5, 7.5]', '(7.5, 10]'], name='range')
+        index = CategoricalIndex(['(0, 2.5]', '(2.5, 5]', '(5, 7.5]',
+                                  '(7.5, 10]'], name='range')
         result_sort = DataFrame([[1, 60], [5, 30], [6, 40], [10, 10]],
                                 columns=['foo', 'bar'], index=index)
 
-        index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]',
-                                  '(5, 7.5]', '(0, 2.5]'],
+        index = CategoricalIndex(['(7.5, 10]', '(2.5, 5]', '(5, 7.5]',
+                                  '(0, 2.5]'],
+                                 categories=['(7.5, 10]', '(2.5, 5]',
+                                             '(5, 7.5]', '(0, 2.5]'],
                                  name='range')
         result_nosort = DataFrame([[10, 10], [5, 30], [6, 40], [1, 60]],
                                   index=index, columns=['foo', 'bar'])
@@ -3975,7 +3977,8 @@ def test_groupby_categorical(self):
         result = data.groupby(cats).mean()
 
         expected = data.groupby(np.asarray(cats)).mean()
-        exp_idx = CategoricalIndex(levels, ordered=True)
+        exp_idx = CategoricalIndex(levels, categories=cats.categories,
+                                   ordered=True)
         expected = expected.reindex(exp_idx)
 
         assert_frame_equal(result, expected)
@@ -3986,14 +3989,16 @@ def test_groupby_categorical(self):
         idx = cats.codes.argsort()
         ord_labels = np.asarray(cats).take(idx)
         ord_data = data.take(idx)
-        expected = ord_data.groupby(
-            Categorical(ord_labels), sort=False).describe()
+
+        exp_cats = Categorical(ord_labels, ordered=True,
+                               categories=['foo', 'bar', 'baz', 'qux'])
+        expected = ord_data.groupby(exp_cats, sort=False).describe()
         expected.index.names = [None, None]
         assert_frame_equal(desc_result, expected)
 
         # GH 10460
-        expc = Categorical.from_codes(
-            np.arange(4).repeat(8), levels, ordered=True)
+        expc = Categorical.from_codes(np.arange(4).repeat(8),
+                                      levels, ordered=True)
         exp = CategoricalIndex(expc)
         self.assert_index_equal(desc_result.index.get_level_values(0), exp)
         exp = Index(['count', 'mean', 'std', 'min', '25%', '50%',
@@ -6266,8 +6271,11 @@ def test_groupby_categorical_two_columns(self):
         # Grouping on a single column
         groups_single_key = test.groupby("cat")
         res = groups_single_key.agg('mean')
+
+        exp_index = pd.CategoricalIndex(["a", "b", "c"], name="cat",
+                                        ordered=True)
         exp = DataFrame({"ints": [1.5, 1.5, np.nan], "val": [20, 30, np.nan]},
-                        index=pd.CategoricalIndex(["a", "b", "c"], name="cat"))
+                        index=exp_index)
         tm.assert_frame_equal(res, exp)
 
         # Grouping on two columns
diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py
index 862e2282bae2f..7136d7effc1fc 100644
--- a/pandas/tests/test_reshape.py
+++ b/pandas/tests/test_reshape.py
@@ -239,26 +239,16 @@ def test_just_na(self):
     def test_include_na(self):
         s = ['a', 'b', np.nan]
         res = get_dummies(s, sparse=self.sparse)
-        exp = DataFrame({'a': {0: 1.0,
-                               1: 0.0,
-                               2: 0.0},
-                         'b': {0: 0.0,
-                               1: 1.0,
-                               2: 0.0}})
+        exp = DataFrame({'a': {0: 1.0, 1: 0.0, 2: 0.0},
+                         'b': {0: 0.0, 1: 1.0, 2: 0.0}})
         assert_frame_equal(res, exp)
 
         # Sparse dataframes do not allow nan labelled columns, see #GH8822
         res_na = get_dummies(s, dummy_na=True, sparse=self.sparse)
-        exp_na = DataFrame({nan: {0: 0.0,
-                                  1: 0.0,
-                                  2: 1.0},
-                            'a': {0: 1.0,
-                                  1: 0.0,
-                                  2: 0.0},
-                            'b': {0: 0.0,
-                                  1: 1.0,
-                                  2: 0.0}}).reindex_axis(
-                                      ['a', 'b', nan], 1)
+        exp_na = DataFrame({nan: {0: 0.0, 1: 0.0, 2: 1.0},
+                            'a': {0: 1.0, 1: 0.0, 2: 0.0},
+                            'b': {0: 0.0, 1: 1.0, 2: 0.0}})
+        exp_na = exp_na.reindex_axis(['a', 'b', nan], 1)
         # hack (NaN handling in assert_index_equal)
         exp_na.columns = res_na.columns
         assert_frame_equal(res_na, exp_na)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 8682302b542be..0ec2c96dbbd7d 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -25,7 +25,7 @@
 from pandas.core.common import (is_sequence, array_equivalent,
                                 is_list_like, is_datetimelike_v_numeric,
                                 is_datetimelike_v_object, is_number,
-                                needs_i8_conversion)
+                                needs_i8_conversion, is_categorical_dtype)
 from pandas.formats.printing import pprint_thing
 from pandas.core.algorithms import take_1d
 
@@ -657,7 +657,7 @@ def assert_equal(a, b, msg=""):
 
 def assert_index_equal(left, right, exact='equiv', check_names=True,
                        check_less_precise=False, check_exact=True,
-                       obj='Index'):
+                       check_categorical=True, obj='Index'):
     """Check that left and right Index are equal.
 
     Parameters
@@ -675,6 +675,8 @@ def assert_index_equal(left, right, exact='equiv', check_names=True,
         5 digits (False) or 3 digits (True) after decimal points are compared.
     check_exact : bool, default True
         Whether to compare number exactly.
+    check_categorical : bool, default True
+        Whether to compare internal Categorical exactly.
     obj : str, default 'Index'
         Specify object name being compared, internally used to show appropriate
         assertion message
@@ -752,6 +754,11 @@ def _get_ilevel_values(index, level):
     if check_names:
         assert_attr_equal('names', left, right, obj=obj)
 
+    if check_categorical:
+        if is_categorical_dtype(left) or is_categorical_dtype(right):
+            assert_categorical_equal(left.values, right.values,
+                                     obj='{0} category'.format(obj))
+
 
 def assert_class_equal(left, right, exact=True, obj='Input'):
     """checks classes are equal."""
@@ -999,6 +1006,7 @@ def assert_series_equal(left, right, check_dtype=True,
                         check_names=True,
                         check_exact=False,
                         check_datetimelike_compat=False,
+                        check_categorical=True,
                         obj='Series'):
 
     """Check that left and right Series are equal.
@@ -1023,6 +1031,8 @@ def assert_series_equal(left, right, check_dtype=True,
         Whether to check the Series and Index names attribute.
     check_dateteimelike_compat : bool, default False
         Compare datetime-like which is comparable ignoring dtype.
+    check_categorical : bool, default True
+        Whether to compare internal Categorical exactly.
     obj : str, default 'Series'
         Specify object name being compared, internally used to show appropriate
         assertion message
@@ -1049,6 +1059,7 @@ def assert_series_equal(left, right, check_dtype=True,
                        check_names=check_names,
                        check_less_precise=check_less_precise,
                        check_exact=check_exact,
+                       check_categorical=check_categorical,
                        obj='{0}.index'.format(obj))
 
     if check_dtype:
@@ -1085,6 +1096,11 @@ def assert_series_equal(left, right, check_dtype=True,
     if check_names:
         assert_attr_equal('name', left, right, obj=obj)
 
+    if check_categorical:
+        if is_categorical_dtype(left) or is_categorical_dtype(right):
+            assert_categorical_equal(left.values, right.values,
+                                     obj='{0} category'.format(obj))
+
 
 # This could be refactored to use the NDFrame.equals method
 def assert_frame_equal(left, right, check_dtype=True,
@@ -1096,6 +1112,7 @@ def assert_frame_equal(left, right, check_dtype=True,
                        by_blocks=False,
                        check_exact=False,
                        check_datetimelike_compat=False,
+                       check_categorical=True,
                        check_like=False,
                        obj='DataFrame'):
 
@@ -1127,6 +1144,8 @@ def assert_frame_equal(left, right, check_dtype=True,
         Whether to compare number exactly.
     check_dateteimelike_compat : bool, default False
         Compare datetime-like which is comparable ignoring dtype.
+    check_categorical : bool, default True
+        Whether to compare internal Categorical exactly.
     check_like : bool, default False
         If true, then reindex_like operands
     obj : str, default 'DataFrame'
@@ -1168,6 +1187,7 @@ def assert_frame_equal(left, right, check_dtype=True,
                        check_names=check_names,
                        check_less_precise=check_less_precise,
                        check_exact=check_exact,
+                       check_categorical=check_categorical,
                        obj='{0}.index'.format(obj))
 
     # column comparison
@@ -1175,6 +1195,7 @@ def assert_frame_equal(left, right, check_dtype=True,
                        check_names=check_names,
                        check_less_precise=check_less_precise,
                        check_exact=check_exact,
+                       check_categorical=check_categorical,
                        obj='{0}.columns'.format(obj))
 
     # compare by blocks
@@ -1199,6 +1220,7 @@ def assert_frame_equal(left, right, check_dtype=True,
                 check_less_precise=check_less_precise,
                 check_exact=check_exact, check_names=check_names,
                 check_datetimelike_compat=check_datetimelike_compat,
+                check_categorical=check_categorical,
                 obj='DataFrame.iloc[:, {0}]'.format(i))