diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 45c32d689bd5b..e375d90b39118 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -432,6 +432,7 @@ Other API changes `_. The ``auth_local_webserver = False`` option is planned to stop working in October 2022. (:issue:`46312`) +- :func:`read_json` now raises ``FileNotFoundError`` (previously ``ValueError``) when input is a string ending in ``.json``, ``.json.gz``, ``.json.bz2``, etc. but no such file exists. (:issue:`29102`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 2a9ed9f15cd11..fbea7a71202eb 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -52,6 +52,7 @@ from pandas.io.common import ( IOHandles, + _extension_to_compression, file_exists, get_handle, is_fsspec_url, @@ -698,6 +699,9 @@ def _get_data_from_filepath(self, filepath_or_buffer): This method turns (1) into (2) to simplify the rest of the processing. It returns input types (2) and (3) unchanged. + + It raises FileNotFoundError if the input is a string ending in + one of .json, .json.gz, .json.bz2, etc. but no such file exists. """ # if it is a string but the file does not exist, it might be a JSON string filepath_or_buffer = stringify_path(filepath_or_buffer) @@ -716,6 +720,14 @@ def _get_data_from_filepath(self, filepath_or_buffer): errors=self.encoding_errors, ) filepath_or_buffer = self.handles.handle + elif ( + isinstance(filepath_or_buffer, str) + and filepath_or_buffer.lower().endswith( + (".json",) + tuple(f".json{c}" for c in _extension_to_compression) + ) + and not file_exists(filepath_or_buffer) + ): + raise FileNotFoundError(f"File {filepath_or_buffer} does not exist") return filepath_or_buffer diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 576d99f25e25c..eaffbc60ead32 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1566,6 +1566,20 @@ def test_read_json_with_url_value(self, url): expected = DataFrame({"url": [url]}) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "compression", + ["", ".gz", ".bz2", ".tar"], + ) + def test_read_json_with_very_long_file_path(self, compression): + # GH 46718 + long_json_path = f'{"a" * 1000}.json{compression}' + with pytest.raises( + FileNotFoundError, match=f"File {long_json_path} does not exist" + ): + # path too long for Windows is handled in file_exists() but raises in + # _get_data_from_filepath() + read_json(long_json_path) + @pytest.mark.parametrize( "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")] ) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index fc605637dbc11..e9e99f6dd0ad7 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -187,7 +187,7 @@ def test_iterator(self): (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), - (pd.read_json, "os", ValueError, "json"), + (pd.read_json, "os", FileNotFoundError, "json"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) @@ -253,7 +253,7 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex (pd.read_hdf, "tables", FileNotFoundError, "h5"), (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), - (pd.read_json, "os", ValueError, "json"), + (pd.read_json, "os", FileNotFoundError, "json"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], )