From c7e63252e89f4f5fc585e843dbe299f97f91a227 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 12 Feb 2018 14:16:27 +0200 Subject: [PATCH] bpo-30109: Fix reindent.py for non-ASCII files. It now processes files as binary streams. This also fixes "make reindent". --- Lib/email/utils.py | 2 +- .../2018-02-12-14-27-01.bpo-30109.lIYlaf.rst | 2 + Tools/scripts/reindent.py | 40 ++++++++++++++----- 3 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst diff --git a/Lib/email/utils.py b/Lib/email/utils.py index a74db42286e7b6..5b22521e5814da 100644 --- a/Lib/email/utils.py +++ b/Lib/email/utils.py @@ -213,7 +213,7 @@ def parsedate_tz(data): def parseaddr(addr): """ Parse addr into its constituent realname and email address parts. - + Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). """ diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst b/Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst new file mode 100644 index 00000000000000..89249e17187908 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst @@ -0,0 +1,2 @@ +Fixed Tools/scripts/reindent.py for non-ASCII files. It now processes files +as binary streams. This also fixes "make reindent". diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py index df15edbc8d6502..540901ba7d7092 100755 --- a/Tools/scripts/reindent.py +++ b/Tools/scripts/reindent.py @@ -109,7 +109,7 @@ def check(file): if verbose: print "checking", file, "...", try: - f = io.open(file) + f = open(file, "rb") except IOError, msg: errprint("%s: I/O Error: %s" % (file, str(msg))) return @@ -133,7 +133,7 @@ def check(file): shutil.copyfile(file, bak) if verbose: print "backed up", file, "to", bak - f = io.open(file, "w", newline=newline) + f = open(file, "wb") r.write(f) f.close() if verbose: @@ -144,7 +144,21 @@ def check(file): print "unchanged." return False -def _rstrip(line, JUNK='\n \t'): +def _detect_newlines(lines): + newlines = {'\r\n' if line[-2:] == '\r\n' else + '\n' if line[-1:] == '\n' else + '\r' if line[-1:] == '\r' else + '' + for line in lines} + newlines.discard('') + newlines = tuple(sorted(newlines)) + if not newlines: + return '\n' + if len(newlines) == 1: + return newlines[0] + return newlines + +def _rstrip(line, JUNK='\r\n \t'): """Return line stripped of trailing spaces, tabs, newlines. Note that line.rstrip() instead also strips sundry control characters, @@ -166,10 +180,18 @@ def __init__(self, f): # Raw file lines. self.raw = f.readlines() + # Save the newlines found in the file so they can be used to + # create output without mutating the newlines. + self.newlines = _detect_newlines(self.raw) + if isinstance(self.newlines, tuple): + self.newline = self.newlines[0] + else: + self.newline = self.newlines + # File lines, rstripped & tab-expanded. Dummy at start is so # that we can use tokenize's 1-based line numbering easily. - # Note that a line is all-blank iff it's "\n". - self.lines = [_rstrip(line).expandtabs() + "\n" + # Note that a line is all-blank iff it's newline. + self.lines = [_rstrip(line).expandtabs() + self.newline for line in self.raw] self.lines.insert(0, None) self.index = 1 # index into self.lines of next line @@ -180,15 +202,11 @@ def __init__(self, f): # indeed, they're our headache! self.stats = [] - # Save the newlines found in the file so they can be used to - # create output without mutating the newlines. - self.newlines = f.newlines - def run(self): tokenize.tokenize(self.getline, self.tokeneater) # Remove trailing empty lines. lines = self.lines - while lines and lines[-1] == "\n": + while lines and lines[-1] == self.newline: lines.pop() # Sentinel. stats = self.stats @@ -244,7 +262,7 @@ def run(self): else: for line in lines[thisstmt:nextstmt]: if diff > 0: - if line == "\n": + if line == self.newline: after.append(line) else: after.append(" " * diff + line)