pandas-dev · jreback · Jul 15, 2021 · Jul 13, 2021 · Jul 14, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -230,7 +230,7 @@ MultiIndex
 I/O
 ^^^
 - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
--
+- Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
 -
 
 Period

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1280,6 +1280,8 @@ cdef class TextReader:
                 # generate extra (bogus) headers if there are more columns than headers
                 if j >= len(self.header[0]):
                     return j
+                elif self.has_mi_columns:
+                    return tuple(header_row[j] for header_row in self.header)
                 else:
                     return self.header[0][j]
             else:

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -257,3 +257,29 @@ def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
     result = parser.read_csv(StringIO(data), dtype=str)
     expected = DataFrame({"a": ["1"], "a.1": ["1"]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,3"
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=list(range(2)),
+        dtype={
+            ("A", "X"): np.int32,
+            ("B", "Y"): np.int32,
+            ("B", "Z"): np.float32,
+        },
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): np.int32([1]),
+            ("B", "Y"): np.int32([2]),
+            ("B", "Z"): np.float32([3]),
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py
@@ -161,3 +161,29 @@ def test_converter_index_col_bug(all_parsers):
 
     xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
     tm.assert_frame_equal(rs, xp)
+
+
+def test_converter_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,3"
+
+    result = parser.read_csv(
+        StringIO(data),
+        header=list(range(2)),
+        converters={
+            ("A", "X"): np.int32,
+            ("B", "Y"): np.int32,
+            ("B", "Z"): np.float32,
+        },
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): np.int32([1]),
+            ("B", "Y"): np.int32([2]),
+            ("B", "Z"): np.float32([3]),
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
@@ -570,3 +570,23 @@ def test_str_nan_dropped(all_parsers):
     )
 
     tm.assert_frame_equal(result, expected)
+
+
+def test_nan_multi_index(all_parsers):
+    # GH 42446
+    parser = all_parsers
+    data = "A,B,B\nX,Y,Z\n1,2,inf"
+
+    result = parser.read_csv(
+        StringIO(data), header=list(range(2)), na_values={("B", "Z"): "inf"}
+    )
+
+    expected = DataFrame(
+        {
+            ("A", "X"): [1],
+            ("B", "Y"): [2],
+            ("B", "Z"): [np.nan],
+        }
+    )
+
+    tm.assert_frame_equal(result, expected)
-Original file line number
+Diff line change
@@ Expand Up / @@ -230,7 +230,7 @@ MultiIndex @@
     I/O
     ^^^
     - Bug in :func:`read_excel` attempting to read chart sheets from .xlsx files (:issue:`41448`)
-    -
+    - Bug in :func:`read_csv` with multi-header input and arguments referencing column names as tuples (:issue:`42446`)
     -
     Period
@@ Expand Down @@