diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 1c12a145caf72..dd8565670a6bf 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -471,6 +471,7 @@ Other enhancements - :meth:`~DataFrame.to_html` now has a ``border`` argument to control the value in the opening ```` tag. The default is the value of the ``html.border`` option, which defaults to 1. This also affects the notebook HTML repr, but since Jupyter's CSS includes a border-width attribute, the visual effect is the same. (:issue:`11563`). - Raise ``ImportError`` in the sql functions when ``sqlalchemy`` is not installed and a connection string is used (:issue:`11920`). - Compatibility with matplotlib 2.0. Older versions of pandas should also work with matplotlib 2.0 (:issue:`13333`) +- When using the ``usecols`` argument in the ``read`` functions, specifying a column name that isn't found now generates a more helpful error message (:issue:`14154`) .. _whatsnew_0190.api: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3bd8579d456d3..24b2a60afd90f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -981,8 +981,7 @@ def _validate_usecols_arg(usecols): if usecols is not None: usecols_dtype = lib.infer_dtype(usecols) - if usecols_dtype not in ('empty', 'integer', - 'string', 'unicode'): + if usecols_dtype not in ('empty', 'integer', 'string', 'unicode'): raise ValueError(msg) return set(usecols) @@ -1424,7 +1423,13 @@ def __init__(self, src, **kwds): if (i in self.usecols or n in self.usecols)] if len(self.names) < len(self.usecols): - raise ValueError("Usecols do not match names.") + bad_cols = [n for n in self.usecols if n not in self.names] + if len(bad_cols) > 0: + raise ValueError(("%s specified in usecols but not found " + "in names.") % bad_cols) + else: + raise ValueError(("Number of usecols is greater than " + "number of names.")) self._set_noconvert_columns() @@ -2185,16 +2190,21 @@ def _handle_usecols(self, columns, usecols_key): usecols_key is used if there are string usecols. """ if self.usecols is not None: - if any([isinstance(u, string_types) for u in self.usecols]): + if any([isinstance(c, string_types) for c in self.usecols]): if len(columns) > 1: raise ValueError("If using multiple headers, usecols must " "be integers.") + bad_cols = [n for n in self.usecols if n not in usecols_key] + if len(bad_cols) > 0: + raise ValueError(("%s specified in usecols but not found " + "in names.") % bad_cols) + col_indices = [] - for u in self.usecols: - if isinstance(u, string_types): - col_indices.append(usecols_key.index(u)) + for c in self.usecols: + if isinstance(c, string_types): + col_indices.append(usecols_key.index(c)) else: - col_indices.append(u) + col_indices.append(c) else: col_indices = self.usecols diff --git a/pandas/io/tests/parser/usecols.py b/pandas/io/tests/parser/usecols.py index 16a19c50be960..efd23c8b3be54 100644 --- a/pandas/io/tests/parser/usecols.py +++ b/pandas/io/tests/parser/usecols.py @@ -83,6 +83,8 @@ def test_usecols(self): # length conflict, passed names and usecols disagree self.assertRaises(ValueError, self.read_csv, StringIO(data), names=['a', 'b'], usecols=[1], header=None) + self.assertRaises(ValueError, self.read_csv, StringIO(data), + names=['a', 'b'], usecols=['A'], header=None) def test_usecols_index_col_False(self): # see gh-9082