Skip to content

[2.7] bpo-30109: Fix reindent.py for non-ASCII files. (GH-1207) #5637

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib/email/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def parsedate_tz(data):
def parseaddr(addr):
"""
Parse addr into its constituent realname and email address parts.

Return a tuple of realname and email address, unless the parse fails, in
which case return a 2-tuple of ('', '').
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixed Tools/scripts/reindent.py for non-ASCII files. It now processes files
as binary streams. This also fixes "make reindent".
40 changes: 29 additions & 11 deletions Tools/scripts/reindent.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def check(file):
if verbose:
print "checking", file, "...",
try:
f = io.open(file)
f = open(file, "rb")
except IOError, msg:
errprint("%s: I/O Error: %s" % (file, str(msg)))
return
Expand All @@ -133,7 +133,7 @@ def check(file):
shutil.copyfile(file, bak)
if verbose:
print "backed up", file, "to", bak
f = io.open(file, "w", newline=newline)
f = open(file, "wb")
r.write(f)
f.close()
if verbose:
Expand All @@ -144,7 +144,21 @@ def check(file):
print "unchanged."
return False

def _rstrip(line, JUNK='\n \t'):
def _detect_newlines(lines):
newlines = {'\r\n' if line[-2:] == '\r\n' else
'\n' if line[-1:] == '\n' else
'\r' if line[-1:] == '\r' else
''
for line in lines}
newlines.discard('')
newlines = tuple(sorted(newlines))
if not newlines:
return '\n'
if len(newlines) == 1:
return newlines[0]
return newlines

def _rstrip(line, JUNK='\r\n \t'):
"""Return line stripped of trailing spaces, tabs, newlines.

Note that line.rstrip() instead also strips sundry control characters,
Expand All @@ -166,10 +180,18 @@ def __init__(self, f):
# Raw file lines.
self.raw = f.readlines()

# Save the newlines found in the file so they can be used to
# create output without mutating the newlines.
self.newlines = _detect_newlines(self.raw)
if isinstance(self.newlines, tuple):
self.newline = self.newlines[0]
else:
self.newline = self.newlines

# File lines, rstripped & tab-expanded. Dummy at start is so
# that we can use tokenize's 1-based line numbering easily.
# Note that a line is all-blank iff it's "\n".
self.lines = [_rstrip(line).expandtabs() + "\n"
# Note that a line is all-blank iff it's newline.
self.lines = [_rstrip(line).expandtabs() + self.newline
for line in self.raw]
self.lines.insert(0, None)
self.index = 1 # index into self.lines of next line
Expand All @@ -180,15 +202,11 @@ def __init__(self, f):
# indeed, they're our headache!
self.stats = []

# Save the newlines found in the file so they can be used to
# create output without mutating the newlines.
self.newlines = f.newlines

def run(self):
tokenize.tokenize(self.getline, self.tokeneater)
# Remove trailing empty lines.
lines = self.lines
while lines and lines[-1] == "\n":
while lines and lines[-1] == self.newline:
lines.pop()
# Sentinel.
stats = self.stats
Expand Down Expand Up @@ -244,7 +262,7 @@ def run(self):
else:
for line in lines[thisstmt:nextstmt]:
if diff > 0:
if line == "\n":
if line == self.newline:
after.append(line)
else:
after.append(" " * diff + line)
Expand Down