diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 1c12a145caf72..dd8565670a6bf 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -471,6 +471,7 @@ Other enhancements
- :meth:`~DataFrame.to_html` now has a ``border`` argument to control the value in the opening ``
`` tag. The default is the value of the ``html.border`` option, which defaults to 1. This also affects the notebook HTML repr, but since Jupyter's CSS includes a border-width attribute, the visual effect is the same. (:issue:`11563`).
- Raise ``ImportError`` in the sql functions when ``sqlalchemy`` is not installed and a connection string is used (:issue:`11920`).
- Compatibility with matplotlib 2.0. Older versions of pandas should also work with matplotlib 2.0 (:issue:`13333`)
+- When using the ``usecols`` argument in the ``read`` functions, specifying a column name that isn't found now generates a more helpful error message (:issue:`14154`)
.. _whatsnew_0190.api:
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 3bd8579d456d3..24b2a60afd90f 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -981,8 +981,7 @@ def _validate_usecols_arg(usecols):
if usecols is not None:
usecols_dtype = lib.infer_dtype(usecols)
- if usecols_dtype not in ('empty', 'integer',
- 'string', 'unicode'):
+ if usecols_dtype not in ('empty', 'integer', 'string', 'unicode'):
raise ValueError(msg)
return set(usecols)
@@ -1424,7 +1423,13 @@ def __init__(self, src, **kwds):
if (i in self.usecols or n in self.usecols)]
if len(self.names) < len(self.usecols):
- raise ValueError("Usecols do not match names.")
+ bad_cols = [n for n in self.usecols if n not in self.names]
+ if len(bad_cols) > 0:
+ raise ValueError(("%s specified in usecols but not found "
+ "in names.") % bad_cols)
+ else:
+ raise ValueError(("Number of usecols is greater than "
+ "number of names."))
self._set_noconvert_columns()
@@ -2185,16 +2190,21 @@ def _handle_usecols(self, columns, usecols_key):
usecols_key is used if there are string usecols.
"""
if self.usecols is not None:
- if any([isinstance(u, string_types) for u in self.usecols]):
+ if any([isinstance(c, string_types) for c in self.usecols]):
if len(columns) > 1:
raise ValueError("If using multiple headers, usecols must "
"be integers.")
+ bad_cols = [n for n in self.usecols if n not in usecols_key]
+ if len(bad_cols) > 0:
+ raise ValueError(("%s specified in usecols but not found "
+ "in names.") % bad_cols)
+
col_indices = []
- for u in self.usecols:
- if isinstance(u, string_types):
- col_indices.append(usecols_key.index(u))
+ for c in self.usecols:
+ if isinstance(c, string_types):
+ col_indices.append(usecols_key.index(c))
else:
- col_indices.append(u)
+ col_indices.append(c)
else:
col_indices = self.usecols
diff --git a/pandas/io/tests/parser/usecols.py b/pandas/io/tests/parser/usecols.py
index 16a19c50be960..efd23c8b3be54 100644
--- a/pandas/io/tests/parser/usecols.py
+++ b/pandas/io/tests/parser/usecols.py
@@ -83,6 +83,8 @@ def test_usecols(self):
# length conflict, passed names and usecols disagree
self.assertRaises(ValueError, self.read_csv, StringIO(data),
names=['a', 'b'], usecols=[1], header=None)
+ self.assertRaises(ValueError, self.read_csv, StringIO(data),
+ names=['a', 'b'], usecols=['A'], header=None)
def test_usecols_index_col_False(self):
# see gh-9082