Skip to content

Commit 703ca81

Browse files
margaretgvanrossum
authored andcommitted
Handle error instead of crash on file with bad encoding (#1619)
Fixes #1521 * Handle LookupError generated by decoding a file with an invalid encoding - Re-raise the LookupError as a slightly more helpful DecodeError - Catch the DecodeError and raise as CompileError so the BuildManager will handle it instead of crashing. * Add unit test to check for crashing on files with invalid encoding - In cmdline.test so the file parsing happens in build.py. The tests in parse-errors.test call the parse() method in parse.py directly.
1 parent a91e49b commit 703ca81

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

mypy/build.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
SymbolTableNode, MODULE_REF)
2929
from mypy.semanal import FirstPass, SemanticAnalyzer, ThirdPass
3030
from mypy.checker import TypeChecker
31-
from mypy.errors import Errors, CompileError, report_internal_error
31+
from mypy.errors import Errors, CompileError, DecodeError, report_internal_error
3232
from mypy import fixup
3333
from mypy.report import Reports
3434
from mypy import defaults
@@ -683,6 +683,10 @@ def read_with_python_encoding(path: str, pyversion: Tuple[int, int]) -> str:
683683
encoding = _encoding
684684

685685
source_bytearray.extend(f.read())
686+
try:
687+
source_bytearray.decode(encoding)
688+
except LookupError as lookuperr:
689+
raise DecodeError(str(lookuperr))
686690
return source_bytearray.decode(encoding)
687691

688692

@@ -1215,7 +1219,7 @@ def parse_file(self) -> None:
12151219
except IOError as ioerr:
12161220
raise CompileError([
12171221
"mypy: can't read file '{}': {}".format(self.path, ioerr.strerror)])
1218-
except UnicodeDecodeError as decodeerr:
1222+
except (UnicodeDecodeError, DecodeError) as decodeerr:
12191223
raise CompileError([
12201224
"mypy: can't decode file '{}': {}".format(self.path, str(decodeerr))])
12211225
self.tree = manager.parse_file(self.id, self.xpath, source)

mypy/errors.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,13 @@ def __init__(self, messages: List[str], use_stdout: bool = False) -> None:
346346
self.use_stdout = use_stdout
347347

348348

349+
class DecodeError(Exception):
350+
"""Exception raised when a file cannot be decoded due to an unknown encoding type.
351+
352+
Essentially a wrapper for the LookupError raised by `bytearray.decode`
353+
"""
354+
355+
349356
def remove_path_prefix(path: str, prefix: str) -> str:
350357
"""If path starts with prefix, return copy of path with the prefix removed.
351358
Otherwise, return path. If path is None, return None.

mypy/test/data/cmdline.test

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,10 @@ undef
7474
[out]
7575
dir/subpkg/a.py:1: error: Name 'undef' is not defined
7676
dir/a.py:1: error: Name 'undef' is not defined
77+
78+
[case testBadFileEncoding]
79+
# cmd: mypy a.py
80+
[file a.py]
81+
# coding: uft-8
82+
[out]
83+
mypy: can't decode file 'a.py': unknown encoding: uft-8

0 commit comments

Comments
 (0)