From 24d1b6a39168eb2051ac828a3ed16fdc32c870d0 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Wed, 19 Dec 2018 07:04:12 +0000
Subject: [PATCH 1/5] ENH: Allow fixed-length strings in df.to_records()

Adds parameter to allow string-like columns to be
cast as fixed-length string-like dtypes for more
efficient storage.

Closes gh-18146.

Originally authored by @qinghao1 but cleaned up
by @gfyoung to fix merge conflicts.
---
 doc/source/whatsnew/v0.24.0.rst       |  1 +
 pandas/core/frame.py                  | 73 +++++++++++++++++++++++++--
 pandas/tests/frame/test_convert_to.py | 38 ++++++++++++++
 3 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index b4331aab3085f..b3172eb2ca966 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -411,6 +411,7 @@ Other Enhancements
 - :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
 - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`)
 - The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
+- :meth:`DataFrame.to_records` now accepts a ``stringlike_as_fixed_length`` parameter to efficiently store string-likes as fixed-length string-like dtypes (e.g. ``S1``) instead of object dtype (``O``)  (:issue:`18146`)
 - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
 - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method <io.sql.method>` section in the documentation. (:issue:`8953`)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 99ae551d3c55b..83cb2240c7f86 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -35,7 +35,7 @@
                            OrderedDict, PY36, raise_with_traceback,
                            string_and_binary_types)
 from pandas.compat.numpy import function as nv
-
+from pandas.api.types import infer_dtype
 from pandas.core.dtypes.cast import (
     maybe_upcast,
     cast_scalar_to_array,
@@ -1540,7 +1540,8 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
 
         return cls(mgr)
 
-    def to_records(self, index=True, convert_datetime64=None):
+    def to_records(self, index=True, convert_datetime64=None,
+                   stringlike_as_fixed_length=False):
         """
         Convert DataFrame to a NumPy record array.
 
@@ -1557,6 +1558,11 @@ def to_records(self, index=True, convert_datetime64=None):
 
             Whether to convert the index to datetime.datetime if it is a
             DatetimeIndex.
+         stringlike_as_fixed_length : bool, default False
+             .. versionadded:: 0.24.0
+
+             Store string-likes as fixed-length string-like dtypes
+             (e.g. ``S1`` dtype) instead of Python objects (``O`` dtype).
 
         Returns
         -------
@@ -1598,6 +1604,27 @@ def to_records(self, index=True, convert_datetime64=None):
         >>> df.to_records(index=False)
         rec.array([(1, 0.5 ), (2, 0.75)],
                   dtype=[('A', '<i8'), ('B', '<f8')])
+
+         By default, strings are recorded as dtype 'O' for object:
+
+         >>> df = pd.DataFrame({'A': [1, 2], 'B': ['abc', 'defg']},
+         ...                   index=['a', 'b'])
+         >>> df.to_records()
+         rec.array([('a', 1, 'abc'), ('b', 2, 'defg')],
+                   dtype=[('index', 'O'), ('A', '<i8'), ('B', 'O')])
+
+         This can be inefficient (e.g. for short strings, or when storing with
+         `np.save()`). They can be recorded as fix-length string-like dtypes
+         such as 'S1' for zero-terminated bytes instead:
+
+         >>> df = pd.DataFrame({'A': [1, 2], 'B': ['abc', 'defg']},
+         ...                   index=['a', 'b'])
+         >>> df.to_records(stringlike_as_fixed_length=True)
+         rec.array([('a', 1, 'abc'), ('b', 2, 'defg')],
+                   dtype=[('index', '<U1'), ('A', '<i8'), ('B', '<U4')])
+
+        Notice how the 'B' column is now stored as '<U4' for length-four
+        strings ('S4' for Python 2.x) instead of the 'O' object dtype.
         """
 
         if convert_datetime64 is not None:
@@ -1633,7 +1660,47 @@ def to_records(self, index=True, convert_datetime64=None):
             arrays = [self[c].get_values() for c in self.columns]
             names = lmap(compat.text_type, self.columns)
 
-        formats = [v.dtype for v in arrays]
+        formats = []
+
+        for v in arrays:
+            if not stringlike_as_fixed_length:
+                formats.append(v.dtype)
+            else:
+                # gh-18146
+                #
+                # For string-like arrays, set dtype as zero-terminated bytes
+                # with max length equal to that of the longest string-like.
+                dtype = infer_dtype(v)
+                symbol = None
+
+                if dtype == "string":
+                    # In Python 3.x, infer_dtype does not
+                    # differentiate string from unicode
+                    # like NumPy arrays do, so we
+                    # specify unicode to be safe.
+                    symbol = "S" if compat.PY2 else "U"
+                elif dtype == "unicode":
+                    # In Python 3.x, infer_dtype does not
+                    # differentiate string from unicode.
+                    #
+                    # Thus, we can only get this result
+                    # in Python 2.x.
+                    symbol = "U"
+                elif dtype == "bytes":
+                    # In Python 2.x, infer_dtype does not
+                    # differentiate string from bytes.
+                    #
+                    # Thus, we can only get this result
+                    # in Python 3.x. However, NumPy does
+                    # not have a fixed-length bytes dtype
+                    # and just uses string instead.
+                    symbol = "S"
+
+                if symbol is not None:
+                    formats.append("{}{}".format(symbol, max(map(len, v))))
+                else:
+                    formats.append(v.dtype)
+
         return np.rec.fromarrays(
             arrays,
             dtype={'names': names, 'formats': formats}
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index f1eb6a33eddeb..95974691e3038 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -191,6 +191,44 @@ def test_to_records_with_categorical(self):
                                 dtype=[('index', '=i8'), ('0', 'O')])
         tm.assert_almost_equal(result, expected)
 
+    @pytest.mark.parametrize("fixed_length", [True, False])
+    @pytest.mark.parametrize("values,dtype_getter", [
+        # Integer --> just take the dtype.
+        ([1, 2], lambda fixed, isPY2: "<i8"),
+
+        # Mixed --> cast to object.
+        ([1, "1"], lambda fixed, isPY2: "O"),
+
+        # String --> cast to string is PY2 else unicode in PY3.
+        (["1", "2"], lambda fixed, isPY2: (
+            ("S" if isPY2 else "U") + "1") if fixed else "O"),
+
+        # String + max-length of longest string.
+        (["12", "2"], lambda fixed, isPY2: (
+            ("S" if isPY2 else "U") + "2") if fixed else "O"),
+
+        # Unicode --> cast to unicode for both PY2 and PY3.
+        ([u"\u2120b", u"456"], lambda fixed, isPY2: "U3" if fixed else "O"),
+
+        # Bytes --> cast to string for both PY2 and PY3.
+        ([b"2", b"5"], lambda fixed, isPY2: "S1" if fixed else "O"),
+    ], ids=["int", "mixed", "str", "max-len", "unicode", "bytes"])
+    def test_to_records_with_strings_as_fixed_length(self, fixed_length,
+                                                     values, dtype_getter):
+
+        # see gh-18146
+        df = DataFrame({"values": values}, index=["a", "b"])
+        result = df.to_records(stringlike_as_fixed_length=fixed_length)
+
+        ind_dtype = ((("S" if compat.PY2 else "U") + "1")
+                     if fixed_length else "O")
+        val_dtype = dtype_getter(fixed_length, compat.PY2)
+
+        expected = np.rec.array([("a", values[0]), ("b", values[1])],
+                                dtype=[("index", ind_dtype),
+                                       ("values", val_dtype)])
+        tm.assert_almost_equal(result, expected)
+
     @pytest.mark.parametrize('mapping', [
         dict,
         collections.defaultdict(list),

From fa5e1ea67fe09f632b29d97092e00ea6ae47eb3f Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Wed, 26 Dec 2018 11:07:52 +0000
Subject: [PATCH 2/5] Add dtype parameters instead of fix-string-like

The original parameter was causing a lot of acrobatics
with regards to string dtypes between 2.x and 3.x.

The new parameters simplify the internal logic and
pass the responsibility and motivation of memory
efficiency back to the users.
---
 doc/source/whatsnew/v0.24.0.rst       |   2 +-
 pandas/core/frame.py                  | 115 ++++++++++---------
 pandas/tests/frame/test_convert_to.py | 155 ++++++++++++++++++++------
 3 files changed, 178 insertions(+), 94 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index b3172eb2ca966..cca15f26cbf99 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -411,7 +411,7 @@ Other Enhancements
 - :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
 - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the ``axis`` parameter (:issue:`8839`)
 - The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
-- :meth:`DataFrame.to_records` now accepts a ``stringlike_as_fixed_length`` parameter to efficiently store string-likes as fixed-length string-like dtypes (e.g. ``S1``) instead of object dtype (``O``)  (:issue:`18146`)
+- :meth:`DataFrame.to_records` now accepts ``index_dtypes`` and ``column_dtypes`` parameters to allow different data types in stored column and index records (:issue:`18146`)
 - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
 - :func:`pandas.DataFrame.to_sql` has gained the ``method`` argument to control SQL insertion clause. See the :ref:`insertion method <io.sql.method>` section in the documentation. (:issue:`8953`)
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 83cb2240c7f86..fe8a1e3fed3aa 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -35,7 +35,6 @@
                            OrderedDict, PY36, raise_with_traceback,
                            string_and_binary_types)
 from pandas.compat.numpy import function as nv
-from pandas.api.types import infer_dtype
 from pandas.core.dtypes.cast import (
     maybe_upcast,
     cast_scalar_to_array,
@@ -1541,7 +1540,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
         return cls(mgr)
 
     def to_records(self, index=True, convert_datetime64=None,
-                   stringlike_as_fixed_length=False):
+                   column_dtypes=None, index_dtypes=None):
         """
         Convert DataFrame to a NumPy record array.
 
@@ -1558,11 +1557,20 @@ def to_records(self, index=True, convert_datetime64=None,
 
             Whether to convert the index to datetime.datetime if it is a
             DatetimeIndex.
-         stringlike_as_fixed_length : bool, default False
-             .. versionadded:: 0.24.0
+        column_dtypes : str, type, dict, default None
+            .. versionadded:: 0.24.0
+
+            If a string or type, the data type to store all columns. If
+            a dictionary, a mapping of column names and indices (zero-indexed)
+            to specific data types.
+        index_dtypes : str, type, dict, default None
+            .. versionadded:: 0.24.0
 
-             Store string-likes as fixed-length string-like dtypes
-             (e.g. ``S1`` dtype) instead of Python objects (``O`` dtype).
+            If a string or type, the data type to store all index levels. If
+            a dictionary, a mapping of index level names and indices
+            (zero-indexed) to specific data types.
+
+            This mapping is applied only if `index=True`.
 
         Returns
         -------
@@ -1605,26 +1613,22 @@ def to_records(self, index=True, convert_datetime64=None,
         rec.array([(1, 0.5 ), (2, 0.75)],
                   dtype=[('A', '<i8'), ('B', '<f8')])
 
-         By default, strings are recorded as dtype 'O' for object:
+        Data types can be specified for the columns:
 
-         >>> df = pd.DataFrame({'A': [1, 2], 'B': ['abc', 'defg']},
-         ...                   index=['a', 'b'])
-         >>> df.to_records()
-         rec.array([('a', 1, 'abc'), ('b', 2, 'defg')],
-                   dtype=[('index', 'O'), ('A', '<i8'), ('B', 'O')])
+        >>> df.to_records(column_dtypes={"A": "int32"})
+        rec.array([('a', 1, 0.5 ), ('b', 2, 0.75)],
+                  dtype=[('I', 'O'), ('A', '<i4'), ('B', '<f8')])
 
-         This can be inefficient (e.g. for short strings, or when storing with
-         `np.save()`). They can be recorded as fix-length string-like dtypes
-         such as 'S1' for zero-terminated bytes instead:
+        As well as for the index:
 
-         >>> df = pd.DataFrame({'A': [1, 2], 'B': ['abc', 'defg']},
-         ...                   index=['a', 'b'])
-         >>> df.to_records(stringlike_as_fixed_length=True)
-         rec.array([('a', 1, 'abc'), ('b', 2, 'defg')],
-                   dtype=[('index', '<U1'), ('A', '<i8'), ('B', '<U4')])
+        >>> df.to_records(index_dtypes="<S2")
+        rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
+                  dtype=[('I', 'S2'), ('A', '<i8'), ('B', '<f8')])
 
-        Notice how the 'B' column is now stored as '<U4' for length-four
-        strings ('S4' for Python 2.x) instead of the 'O' object dtype.
+        >>> index_dtypes = "<S{}".format(df.index.str.len().max())
+        >>> df.to_records(index_dtypes=index_dtypes)
+        rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
+                  dtype=[('I', 'S1'), ('A', '<i8'), ('B', '<f8')])
         """
 
         if convert_datetime64 is not None:
@@ -1647,6 +1651,7 @@ def to_records(self, index=True, convert_datetime64=None,
 
             count = 0
             index_names = list(self.index.names)
+
             if isinstance(self.index, MultiIndex):
                 for i, n in enumerate(index_names):
                     if n is None:
@@ -1654,52 +1659,46 @@ def to_records(self, index=True, convert_datetime64=None,
                         count += 1
             elif index_names[0] is None:
                 index_names = ['index']
+
             names = (lmap(compat.text_type, index_names) +
                      lmap(compat.text_type, self.columns))
         else:
             arrays = [self[c].get_values() for c in self.columns]
             names = lmap(compat.text_type, self.columns)
+            index_names = []
 
+        index_len = len(index_names)
         formats = []
 
-        for v in arrays:
-            if not stringlike_as_fixed_length:
-                formats.append(v.dtype)
+        for i, v in enumerate(arrays):
+            index = i
+
+            if index < index_len:
+                dtype_mapping = index_dtypes
+                name = index_names[index]
             else:
-                # gh-18146
-                #
-                # For string-like arrays, set dtype as zero-terminated bytes
-                # with max length equal to that of the longest string-like.
-                dtype = infer_dtype(v)
-                symbol = None
-
-                if dtype == "string":
-                    # In Python 3.x, infer_dtype does not
-                    # differentiate string from unicode
-                    # like NumPy arrays do, so we
-                    # specify unicode to be safe.
-                    symbol = "S" if compat.PY2 else "U"
-                elif dtype == "unicode":
-                    # In Python 3.x, infer_dtype does not
-                    # differentiate string from unicode.
-                    #
-                    # Thus, we can only get this result
-                    # in Python 2.x.
-                    symbol = "U"
-                elif dtype == "bytes":
-                    # In Python 2.x, infer_dtype does not
-                    # differentiate string from bytes.
-                    #
-                    # Thus, we can only get this result
-                    # in Python 3.x. However, NumPy does
-                    # not have a fixed-length bytes dtype
-                    # and just uses string instead.
-                    symbol = "S"
-
-                if symbol is not None:
-                    formats.append("{}{}".format(symbol, max(map(len, v))))
+                index -= index_len
+                dtype_mapping = column_dtypes
+                name = self.columns[index]
+
+            if isinstance(dtype_mapping, dict):
+                if name in dtype_mapping:
+                    dtype_mapping = dtype_mapping[name]
+                elif index in dtype_mapping:
+                    dtype_mapping = dtype_mapping[index]
                 else:
-                    formats.append(v.dtype)
+                    dtype_mapping = None
+
+            if dtype_mapping is None:
+                formats.append(v.dtype)
+            elif isinstance(dtype_mapping, (type, compat.string_types)):
+                formats.append(dtype_mapping)
+            else:
+                element = "row" if i < index_len else "column"
+                msg = ("Invalid dtype {dtype} specified for "
+                       "{element} {name}").format(dtype=dtype_mapping,
+                                                  element=element, name=name)
+                raise ValueError(msg)
 
         return np.rec.fromarrays(
             arrays,
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 95974691e3038..3f517c9d51c8e 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -191,42 +191,127 @@ def test_to_records_with_categorical(self):
                                 dtype=[('index', '=i8'), ('0', 'O')])
         tm.assert_almost_equal(result, expected)
 
-    @pytest.mark.parametrize("fixed_length", [True, False])
-    @pytest.mark.parametrize("values,dtype_getter", [
-        # Integer --> just take the dtype.
-        ([1, 2], lambda fixed, isPY2: "<i8"),
-
-        # Mixed --> cast to object.
-        ([1, "1"], lambda fixed, isPY2: "O"),
-
-        # String --> cast to string is PY2 else unicode in PY3.
-        (["1", "2"], lambda fixed, isPY2: (
-            ("S" if isPY2 else "U") + "1") if fixed else "O"),
-
-        # String + max-length of longest string.
-        (["12", "2"], lambda fixed, isPY2: (
-            ("S" if isPY2 else "U") + "2") if fixed else "O"),
-
-        # Unicode --> cast to unicode for both PY2 and PY3.
-        ([u"\u2120b", u"456"], lambda fixed, isPY2: "U3" if fixed else "O"),
-
-        # Bytes --> cast to string for both PY2 and PY3.
-        ([b"2", b"5"], lambda fixed, isPY2: "S1" if fixed else "O"),
-    ], ids=["int", "mixed", "str", "max-len", "unicode", "bytes"])
-    def test_to_records_with_strings_as_fixed_length(self, fixed_length,
-                                                     values, dtype_getter):
-
+    @pytest.mark.parametrize("kwargs,expected", [
+        # No dtypes --> default to array dtypes.
+        (dict(),
+         np.rec.array([(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
+                      dtype=[("index", "<i8"), ("A", "<i8"),
+                             ("B", "<f8"), ("C", "O")])),
+
+        # Should have no effect in this case.
+        (dict(index=True),
+         np.rec.array([(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
+                      dtype=[("index", "<i8"), ("A", "<i8"),
+                             ("B", "<f8"), ("C", "O")])),
+
+        # Column dtype applied across the board. Index unaffected.
+        (dict(column_dtypes="<U4"),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<i8"), ("A", "<U4"),
+                             ("B", "<U4"), ("C", "<U4")])),
+
+        # Index dtype applied across the board. Columns unaffected.
+        (dict(index_dtypes="<U1"),
+         np.rec.array([("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
+                      dtype=[("index", "<U1"), ("A", "<i8"),
+                             ("B", "<f8"), ("C", "O")])),
+
+        # Pass in a type instance.
+        (dict(column_dtypes=np.unicode),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<i8"), ("A", "<U"),
+                             ("B", "<U"), ("C", "<U")])),
+
+        # Pass in a dictionary (name-only).
+        (dict(column_dtypes={"A": np.int8, "B": np.float32, "C": "<U2"}),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<i8"), ("A", "i1"),
+                             ("B", "<f4"), ("C", "<U2")])),
+
+        # Pass in a dictionary (indices-only).
+        (dict(index_dtypes={0: "int16"}),
+         np.rec.array([(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
+                      dtype=[("index", "i2"), ("A", "<i8"),
+                             ("B", "<f8"), ("C", "O")])),
+
+        # Ignore index mappings if index is not True.
+        (dict(index=False, index_dtypes="<U2"),
+         np.rec.array([(1, 0.2, "a"), (2, 1.5, "bc")],
+                      dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")])),
+
+        # Non-existent names / indices in mapping should not error.
+        (dict(index_dtypes={0: "int16", "not-there": "float32"}),
+         np.rec.array([(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
+                      dtype=[("index", "i2"), ("A", "<i8"),
+                             ("B", "<f8"), ("C", "O")])),
+
+        # Names / indices not in mapping default to array dtype.
+        (dict(column_dtypes={"A": np.int8, "B": np.float32}),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<i8"), ("A", "i1"),
+                             ("B", "<f4"), ("C", "O")])),
+
+        # Mixture of everything.
+        (dict(column_dtypes={"A": np.int8, "B": np.float32},
+              index_dtypes="<U2"),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<U2"), ("A", "i1"),
+                             ("B", "<f4"), ("C", "O")])),
+
+        # Invalid dype values.
+        (dict(index=False, column_dtypes=list()),
+         "Invalid dtype \\[\\] specified for column A"),
+
+        (dict(index=False, column_dtypes={"A": "int32", "B": 5}),
+         "Invalid dtype 5 specified for column B"),
+    ])
+    def test_to_records_dtype(self, kwargs, expected):
         # see gh-18146
-        df = DataFrame({"values": values}, index=["a", "b"])
-        result = df.to_records(stringlike_as_fixed_length=fixed_length)
-
-        ind_dtype = ((("S" if compat.PY2 else "U") + "1")
-                     if fixed_length else "O")
-        val_dtype = dtype_getter(fixed_length, compat.PY2)
-
-        expected = np.rec.array([("a", values[0]), ("b", values[1])],
-                                dtype=[("index", ind_dtype),
-                                       ("values", val_dtype)])
+        df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]})
+
+        if isinstance(expected, str):
+            with pytest.raises(ValueError, match=expected):
+                df.to_records(**kwargs)
+        else:
+            result = df.to_records(**kwargs)
+            tm.assert_almost_equal(result, expected)
+
+    @pytest.mark.parametrize("df,kwargs,expected", [
+        # MultiIndex in the index.
+        (DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                   columns=list("abc")).set_index(["a", "b"]),
+         dict(column_dtypes="float64", index_dtypes={0: "int32", 1: "int8"}),
+         np.rec.array([(1, 2, 3.), (4, 5, 6.), (7, 8, 9.)],
+                      dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")])),
+
+        # MultiIndex in the columns.
+        (DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                   columns=MultiIndex.from_tuples([("a", "d"), ("b", "e"),
+                                                   ("c", "f")])),
+         dict(column_dtypes={0: "<U1", 2: "float32"}, index_dtypes="float32"),
+         np.rec.array([(0., u"1", 2, 3.), (1., u"4", 5, 6.),
+                       (2., u"7", 8, 9.)],
+                      dtype=[("index", "<f4"),
+                             ("('a', 'd')", "<U1"),
+                             ("('b', 'e')", "<i8"),
+                             ("('c', 'f')", "<f4")])),
+
+        # MultiIndex in both the columns and index.
+        (DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
+                   columns=MultiIndex.from_tuples([
+                       ("a", "d"), ("b", "e"), ("c", "f")], names=list("ab")),
+                   index=MultiIndex.from_tuples([
+                       ("d", -4), ("d", -5), ("f", -6)], names=list("cd"))),
+         dict(column_dtypes="float64", index_dtypes={0: "<U2", 1: "int8"}),
+         np.rec.array([("d", -4, 1., 2., 3.), ("d", -5, 4., 5., 6.),
+                       ("f", -6, 7, 8, 9.)],
+                      dtype=[("c", "<U2"), ("d", "i1"),
+                             ("('a', 'd')", "<f8"), ("('b', 'e')", "<f8"),
+                             ("('c', 'f')", "<f8")]))
+    ])
+    def test_to_records_dtype_mi(self, df, kwargs, expected):
+        # see gh-18146
+        result = df.to_records(**kwargs)
         tm.assert_almost_equal(result, expected)
 
     @pytest.mark.parametrize('mapping', [

From 4cacb52ce053a828da721fb941ea53456287e7f5 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Sat, 29 Dec 2018 02:00:11 +0000
Subject: [PATCH 3/5] MAINT: Use is_dict_like in to_records

More generic than checking whether our
mappings are instances of dict.

Expands is_dict_like check to include
whether it has a __contains__ method.
---
 pandas/core/dtypes/inference.py       |  5 ++++-
 pandas/core/frame.py                  |  3 ++-
 pandas/tests/frame/test_convert_to.py | 28 +++++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
index 241a1b471f677..b11542622451c 100644
--- a/pandas/core/dtypes/inference.py
+++ b/pandas/core/dtypes/inference.py
@@ -398,8 +398,11 @@ def is_dict_like(obj):
     >>> is_dict_like([1, 2, 3])
     False
     """
+    for attr in ("__getitem__", "keys", "__contains__"):
+        if not hasattr(obj, attr):
+            return False
 
-    return hasattr(obj, '__getitem__') and hasattr(obj, 'keys')
+    return True
 
 
 def is_named_tuple(obj):
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index fe8a1e3fed3aa..dfa539159a25a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -48,6 +48,7 @@
     maybe_upcast_putmask,
     find_common_type)
 from pandas.core.dtypes.common import (
+    is_dict_like,
     is_object_dtype,
     is_extension_type,
     is_extension_array_dtype,
@@ -1681,7 +1682,7 @@ def to_records(self, index=True, convert_datetime64=None,
                 dtype_mapping = column_dtypes
                 name = self.columns[index]
 
-            if isinstance(dtype_mapping, dict):
+            if is_dict_like(dtype_mapping):
                 if name in dtype_mapping:
                     dtype_mapping = dtype_mapping[name]
                 elif index in dtype_mapping:
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 3f517c9d51c8e..b875559169205 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -314,6 +314,34 @@ def test_to_records_dtype_mi(self, df, kwargs, expected):
         result = df.to_records(**kwargs)
         tm.assert_almost_equal(result, expected)
 
+    def test_to_records_dict_like(self):
+        # see gh-18146
+        class DictLike(object):
+            def __init__(self, **kwargs):
+                self.d = kwargs.copy()
+
+            def __getitem__(self, key):
+                return self.d.__getitem__(key)
+
+            def __contains__(self, key):
+                return key in self.d
+
+            def keys(self):
+                return self.d.keys()
+
+        df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]})
+
+        dtype_mappings = dict(column_dtypes=DictLike(**{"A": np.int8,
+                                                        "B": np.float32}),
+                              index_dtypes="<U2")
+
+        result = df.to_records(**dtype_mappings)
+        expected = np.rec.array([("0", "1", "0.2", "a"),
+                                 ("1", "2", "1.5", "bc")],
+                                dtype=[("index", "<U2"), ("A", "i1"),
+                                       ("B", "<f4"), ("C", "O")])
+        tm.assert_almost_equal(result, expected)
+
     @pytest.mark.parametrize('mapping', [
         dict,
         collections.defaultdict(list),

From 3b100c3270f22348f83934b040c65a427be71a5b Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Sun, 30 Dec 2018 01:16:14 +0000
Subject: [PATCH 4/5] TST: Add test for is_dict_like expanded def

---
 pandas/tests/dtypes/test_inference.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 0c22b595bc74d..9c5944ed585f3 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -178,6 +178,33 @@ def test_is_dict_like_fails(ll):
     assert not inference.is_dict_like(ll)
 
 
+@pytest.mark.parametrize("has_keys", [True, False])
+@pytest.mark.parametrize("has_getitem", [True, False])
+@pytest.mark.parametrize("has_contains", [True, False])
+def test_is_dict_like_duct_type(has_keys, has_getitem, has_contains):
+    class DictLike(object):
+        def __init__(self, d):
+            self.d = d
+
+        if has_keys:
+            def keys(self):
+                return self.d.keys()
+
+        if has_getitem:
+            def __getitem__(self, key):
+                return self.d.__getitem__(key)
+
+        if has_contains:
+            def __contains__(self, key):
+                return self.d.__contains__(key)
+
+    d = DictLike({1: 2})
+    result = inference.is_dict_like(d)
+    expected = has_keys and has_getitem and has_contains
+
+    assert result is expected
+
+
 def test_is_file_like(mock):
     class MockFile(object):
         pass

From ec69fe0b4a98b7e21463ec1ceb6a3eaeed9cc96f Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17+GitHub@gmail.com>
Date: Sun, 30 Dec 2018 20:58:06 +0000
Subject: [PATCH 5/5] MAINT: Address final comments

---
 pandas/core/frame.py                  | 19 +++++++++++++++++++
 pandas/tests/dtypes/test_inference.py |  2 +-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index dfa539159a25a..99653248216f5 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1674,6 +1674,15 @@ def to_records(self, index=True, convert_datetime64=None,
         for i, v in enumerate(arrays):
             index = i
 
+            # When the names and arrays are collected, we
+            # first collect those in the DataFrame's index,
+            # followed by those in its columns.
+            #
+            # Thus, the total length of the array is:
+            # len(index_names) + len(DataFrame.columns).
+            #
+            # This check allows us to see whether we are
+            # handling a name / array in the index or column.
             if index < index_len:
                 dtype_mapping = index_dtypes
                 name = index_names[index]
@@ -1682,6 +1691,11 @@ def to_records(self, index=True, convert_datetime64=None,
                 dtype_mapping = column_dtypes
                 name = self.columns[index]
 
+            # We have a dictionary, so we get the data type
+            # associated with the index or column (which can
+            # be denoted by its name in the DataFrame or its
+            # position in DataFrame's array of indices or
+            # columns, whichever is applicable.
             if is_dict_like(dtype_mapping):
                 if name in dtype_mapping:
                     dtype_mapping = dtype_mapping[name]
@@ -1690,6 +1704,11 @@ def to_records(self, index=True, convert_datetime64=None,
                 else:
                     dtype_mapping = None
 
+            # If no mapping can be found, use the array's
+            # dtype attribute for formatting.
+            #
+            # A valid dtype must either be a type or
+            # string naming a type.
             if dtype_mapping is None:
                 formats.append(v.dtype)
             elif isinstance(dtype_mapping, (type, compat.string_types)):
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 9c5944ed585f3..d9b1b0db90562 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -181,7 +181,7 @@ def test_is_dict_like_fails(ll):
 @pytest.mark.parametrize("has_keys", [True, False])
 @pytest.mark.parametrize("has_getitem", [True, False])
 @pytest.mark.parametrize("has_contains", [True, False])
-def test_is_dict_like_duct_type(has_keys, has_getitem, has_contains):
+def test_is_dict_like_duck_type(has_keys, has_getitem, has_contains):
     class DictLike(object):
         def __init__(self, d):
             self.d = d