From bbcb1ceb1b6f27ded5d96845705145a06490a0cb Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 15 May 2024 01:02:08 +0100 Subject: [PATCH 01/14] GH-73991: Add `pathlib.Path.rmtree()` Add a `Path.rmtree()` method that removes an entire directory tree using `shutil.rmtree()`. The signature of the optional *on_error* argument matches the `Path.walk()` argument of the same name, but differs from the *onexc* and *onerror* arguments to `shutil.rmtree()`. Consistency within pathlib is probably more important. In the private pathlib ABCs, we add an implementation based on `walk()`. --- Doc/library/pathlib.rst | 39 ++++++ Doc/whatsnew/3.14.rst | 6 + Lib/pathlib/_abc.py | 38 ++++++ Lib/pathlib/_local.py | 19 +++ Lib/shutil.py | 1 + Lib/test/test_pathlib/test_pathlib.py | 19 --- Lib/test/test_pathlib/test_pathlib_abc.py | 116 +++++++++++++++++- ...4-05-15-01-21-44.gh-issue-73991.bNDqQN.rst | 1 + 8 files changed, 219 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-15-01-21-44.gh-issue-73991.bNDqQN.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 27ed0a32e801cc..5dd80a96878d77 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1486,6 +1486,45 @@ call fails (for example because the path doesn't exist). Remove this directory. The directory must be empty. +.. method:: Path.rmtree(ignore_errors=False, on_error=None) + + Delete this entire directory tree. The path must not refer to a symlink. + + If *ignore_errors* is true, errors resulting from failed removals will be + ignored. If *ignore_errors* is false or omitted, and a function is given to + *on_error*, it will be called each time an exception is raised. If neither + *ignore_errors* nor *on_error* are supplied, exceptions are propagated to + the caller. + + .. note:: + + On platforms that support the necessary fd-based functions a symlink + attack resistant version of :meth:`~Path.rmtree` is used by default. On + other platforms, the :func:`~Path.rmtree` implementation is susceptible + to a symlink attack: given proper timing and circumstances, attackers + can manipulate symlinks on the filesystem to delete files they wouldn't + be able to access otherwise. Applications can use the + :data:`Path.rmtree.avoids_symlink_attacks` function attribute to + determine which case applies. + + If the optional argument *on_error* is specified, it should be a callable; + it will be called with one argument, an :exc:`OSError` instance. The + callable can handle the error to continue the deletion process or re-raise + it to stop. Note that the filename is available as the ``filename`` + attribute of the exception object. + + This method calls :func:`shutil.rmtree` internally. + + .. attribute:: Path.rmtree.avoids_symlink_attacks + + Indicates whether the current platform and implementation provides a + symlink attack resistant version of :meth:`~Path.rmtree`. Currently + this is only true for platforms supporting fd-based directory access + functions. + + .. versionadded:: 3.14 + + .. method:: Path.samefile(other_path) Return whether this path points to the same file as *other_path*, which diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 33a0f3e0f2f4bc..2588ce8d556378 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -86,6 +86,12 @@ New Modules Improved Modules ================ +pathlib +------- + +* Add :meth:`pathlib.Path.rmtree`, which recursively removes a directory. + (Contributed by Barney Gale in :gh:`73991`.) + Optimizations ============= diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 568a17df26fc33..f4c0682dccf80f 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -807,6 +807,44 @@ def rmdir(self): """ raise UnsupportedOperation(self._unsupported_msg('rmdir()')) + def rmtree(self, ignore_errors=False, on_error=None): + """ + Recursively delete this directory tree. + + If *ignore_errors* is true, exceptions raised from scanning the tree + and removing files and directories are ignored. Otherwise, if + *on_error* is set, it will be called to handle the error. If neither + *ignore_errors* nor *on_error* are set, exceptions are propagated to + the caller. + """ + if ignore_errors: + def on_error(error): + pass + elif on_error is None: + def on_error(error): + raise + + try: + if self.is_symlink(): + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as error: + on_error(error) + return + + walker = self.walk(top_down=False, follow_symlinks=False, on_error=on_error) + for dirpath, _, filenames in walker: + for filename in filenames: + filepath = dirpath / filename + try: + filepath.unlink() + except OSError as error: + on_error(error) + try: + dirpath.rmdir() + except OSError as error: + on_error(error) + rmtree.avoids_symlink_attacks = False + def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 7dc071949b9bd7..740dc6ce9d3e07 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -3,6 +3,7 @@ import operator import os import posixpath +import shutil import sys import warnings from glob import _StringGlobber @@ -774,6 +775,24 @@ def rmdir(self): """ os.rmdir(self) + def rmtree(self, ignore_errors=False, on_error=None): + """ + Recursively delete this directory tree. + + If *ignore_errors* is true, exceptions raised from scanning the tree + and removing files and directories are ignored. Otherwise, if + *on_error* is set, it will be called to handle the error. If neither + *ignore_errors* nor *on_error* are set, exceptions are propagated to + the caller. + """ + if on_error is None: + onexc = None + else: + def onexc(function, path, excinfo): + on_error(excinfo) + shutil.rmtree(self, ignore_errors=ignore_errors, onexc=onexc) + rmtree.avoids_symlink_attacks = shutil.rmtree.avoids_symlink_attacks + def rename(self, target): """ Rename this path to the target path. diff --git a/Lib/shutil.py b/Lib/shutil.py index c9b4da34b1e19b..c523da63898d7e 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -774,6 +774,7 @@ def onexc(*args): exc_info = type(exc), exc, exc.__traceback__ return onerror(func, path, exc_info) + path = os.fspath(path) if _use_fd_functions: # While the unsafe rmtree works fine on bytes, the fd based does not. if isinstance(path, bytes): diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 4fd2aac4a62139..4414e7603f840b 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -764,25 +764,6 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) - def test_unlink(self): - p = self.cls(self.base) / 'fileA' - p.unlink() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - - def test_unlink_missing_ok(self): - p = self.cls(self.base) / 'fileAAA' - self.assertFileNotFound(p.unlink) - p.unlink(missing_ok=True) - - def test_rmdir(self): - p = self.cls(self.base) / 'dirA' - for q in p.iterdir(): - q.unlink() - p.rmdir() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - @os_helper.skip_unless_hardlink def test_hardlink_to(self): P = self.cls(self.base) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index aadecbc142cca6..ea28a71305a074 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1478,7 +1478,7 @@ def iterdir(self): if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path in self._directories: - return (self / name for name in self._directories[path]) + return iter([self / name for name in self._directories[path]]) else: raise FileNotFoundError(errno.ENOENT, "File not found", path) @@ -1499,6 +1499,37 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.parent.mkdir(parents=True, exist_ok=True) self.mkdir(mode, parents=False, exist_ok=exist_ok) + def unlink(self, missing_ok=False): + path_obj = self.parent.resolve(strict=True) / self.name + path = str(path_obj) + name = path_obj.name + parent = str(path_obj.parent) + if path in self._directories: + raise IsADirectoryError(errno.EISDIR, "Is a directory", path) + elif path in self._files: + self._directories[parent].remove(name) + del self._files[path] + elif path in self._symlinks: + self._directories[parent].remove(name) + del self._symlinks[path] + elif not missing_ok: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + + def rmdir(self): + path_obj = self.parent.resolve(strict=True) / self.name + path = str(path_obj) + if path in self._files or path in self._symlinks: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) + elif path not in self._directories: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + elif self._directories[path]: + raise OSError(errno.ENOTEMPTY, "Directory not empty", path) + else: + name = path_obj.name + parent = str(path_obj.parent) + self._directories[parent].remove(name) + del self._directories[path] + class DummyPathTest(DummyPurePathTest): """Tests for PathBase methods that use stat(), open() and iterdir().""" @@ -2465,6 +2496,89 @@ def test_walk_symlink_location(self): else: self.fail("symlink not found") + def test_unlink(self): + p = self.cls(self.base) / 'fileA' + p.unlink() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_unlink_missing_ok(self): + p = self.cls(self.base) / 'fileAAA' + self.assertFileNotFound(p.unlink) + p.unlink(missing_ok=True) + + def test_rmdir(self): + p = self.cls(self.base) / 'dirA' + for q in p.iterdir(): + q.unlink() + p.rmdir() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_rmtree(self): + base = self.cls(self.base) + base.joinpath('dirA').rmtree() + self.assertRaises(FileNotFoundError, base.joinpath('dirA').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirA', 'linkC').lstat) + base.joinpath('dirB').rmtree() + self.assertRaises(FileNotFoundError, base.joinpath('dirB').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'fileB').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirB', 'linkD').lstat) + base.joinpath('dirC').rmtree() + self.assertRaises(FileNotFoundError, base.joinpath('dirC').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'dirD').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'dirD', 'fileD').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'fileC').stat) + self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'novel.txt').stat) + + def test_rmtree_errors(self): + base = self.cls(self.base) + + # missing file + p = base / 'nonexist' + self.assertRaises(FileNotFoundError, p.rmtree) + p.rmtree(ignore_errors=True) + + # existing file + p = base / 'fileA' + self.assertRaises(NotADirectoryError, p.rmtree) + self.assertTrue(p.exists()) + p.rmtree(ignore_errors=True) + self.assertTrue(p.exists()) + + def test_rmtree_on_error(self): + errors = [] + base = self.cls(self.base) + p = base / 'fileA' + p.rmtree(on_error=errors.append) + self.assertIsInstance(errors[0], NotADirectoryError) + self.assertEqual(errors[0].filename, str(p)) + + @needs_symlinks + def test_rmtree_symlink(self): + base = self.cls(self.base) + link = base / 'linkB' + target = link.resolve() + self.assertRaises(OSError, link.rmtree) + self.assertTrue(link.exists(follow_symlinks=False)) + self.assertTrue(target.exists()) + errors = [] + link.rmtree(on_error=errors.append) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], OSError) + + @needs_symlinks + def test_rmtree_inner_symlink(self): + base = self.cls(self.base) + folder = base / 'dirA' + link = folder / 'linkC' + target = link.resolve() + + folder.rmtree() + self.assertRaises(FileNotFoundError, folder.stat) + self.assertRaises(FileNotFoundError, link.lstat) + target.stat() + class DummyPathWithSymlinks(DummyPath): __slots__ = () diff --git a/Misc/NEWS.d/next/Library/2024-05-15-01-21-44.gh-issue-73991.bNDqQN.rst b/Misc/NEWS.d/next/Library/2024-05-15-01-21-44.gh-issue-73991.bNDqQN.rst new file mode 100644 index 00000000000000..9aa7a7dba666af --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-15-01-21-44.gh-issue-73991.bNDqQN.rst @@ -0,0 +1 @@ +Add :meth:`pathlib.Path.rmtree`, which recursively removes a directory. From 62f7eae672ca5b56866f6aa4f3a9dfa9f078389c Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 27 May 2024 14:38:23 +0100 Subject: [PATCH 02/14] Use `os.walk()` and `os.fwalk()` rather than `shutil.rmtree()` --- Lib/os.py | 18 ++++++++-- Lib/pathlib/_abc.py | 6 +++- Lib/pathlib/_local.py | 77 +++++++++++++++++++++++++++++++++---------- 3 files changed, 79 insertions(+), 22 deletions(-) diff --git a/Lib/os.py b/Lib/os.py index 7661ce68ca3be2..7e8c89146d641e 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -281,6 +281,11 @@ def renames(old, new): __all__.extend(["makedirs", "removedirs", "renames"]) +# Private sentinel that can be passed to os.walk() to classify all symlinks as +# files, and walk into every path classified as a directory (potentially after +# user modification in topdown mode). Used by pathlib.Path.walk(). +_walk_symlinks_as_files = object() + def walk(top, topdown=True, onerror=None, followlinks=False): """Directory tree generator. @@ -382,7 +387,10 @@ def walk(top, topdown=True, onerror=None, followlinks=False): break try: - is_dir = entry.is_dir() + if followlinks is _walk_symlinks_as_files: + is_dir = entry.is_dir(follow_symlinks=False) and not entry.is_junction() + else: + is_dir = entry.is_dir() except OSError: # If is_dir() raises an OSError, consider the entry not to # be a directory, same behaviour as os.path.isdir(). @@ -489,7 +497,11 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): # necessary, it can be adapted to only require O(1) FDs, see issue # #13734. - scandir_it = scandir(topfd) + try: + scandir_it = scandir(topfd) + except OSError as error: + error.filename = toppath + raise dirs = [] nondirs = [] entries = None if topdown or follow_symlinks else [] @@ -498,7 +510,7 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): if isbytes: name = fsencode(name) try: - if entry.is_dir(): + if entry.is_dir(follow_symlinks=follow_symlinks is not _walk_symlinks_as_files): dirs.append(name) if entries is not None: entries.append(entry) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f4c0682dccf80f..c12601b8d45693 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -825,7 +825,7 @@ def on_error(error): raise try: - if self.is_symlink(): + if self.is_symlink() or self.is_junction(): raise OSError("Cannot call rmtree on a symbolic link") except OSError as error: on_error(error) @@ -837,10 +837,14 @@ def on_error(error): filepath = dirpath / filename try: filepath.unlink() + except FileNotFoundError: + pass except OSError as error: on_error(error) try: dirpath.rmdir() + except FileNotFoundError: + pass except OSError as error: on_error(error) rmtree.avoids_symlink_attacks = False diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 2cfd1c8ab325a5..f90aea44cbc9c3 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -3,7 +3,6 @@ import operator import os import posixpath -import shutil import sys import warnings from glob import _StringGlobber @@ -639,7 +638,9 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) root_dir = str(self) - results = self._globber.walk(root_dir, top_down, on_error, follow_symlinks) + if not follow_symlinks: + follow_symlinks = os._walk_symlinks_as_files + results = os.walk(root_dir, top_down, on_error, follow_symlinks) for path_str, dirnames, filenames in results: if root_dir == '.': path_str = path_str[2:] @@ -768,23 +769,63 @@ def rmdir(self): """ os.rmdir(self) - def rmtree(self, ignore_errors=False, on_error=None): - """ - Recursively delete this directory tree. + _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= + os.supports_dir_fd and + os.scandir in os.supports_fd and + os.stat in os.supports_follow_symlinks) - If *ignore_errors* is true, exceptions raised from scanning the tree - and removing files and directories are ignored. Otherwise, if - *on_error* is set, it will be called to handle the error. If neither - *ignore_errors* nor *on_error* are set, exceptions are propagated to - the caller. - """ - if on_error is None: - onexc = None - else: - def onexc(function, path, excinfo): - on_error(excinfo) - shutil.rmtree(self, ignore_errors=ignore_errors, onexc=onexc) - rmtree.avoids_symlink_attacks = shutil.rmtree.avoids_symlink_attacks + if _use_fd_functions: + def rmtree(self, ignore_errors=False, on_error=None): + """ + Recursively delete this directory tree. + + If *ignore_errors* is true, exceptions raised from scanning the tree + and removing files and directories are ignored. Otherwise, if + *on_error* is set, it will be called to handle the error. If neither + *ignore_errors* nor *on_error* are set, exceptions are propagated to + the caller. + """ + path = os.fspath(self) + if ignore_errors: + def on_error(error): + pass + elif on_error is None: + def on_error(error): + raise + try: + orig_st = os.lstat(path) + walker = os.fwalk( + path, + topdown=False, + onerror=on_error, + follow_symlinks=os._walk_symlinks_as_files) + for dirpath, dirnames, filenames, fd in walker: + if dirpath == path and not os.path.samestat(orig_st, os.fstat(fd)): + try: + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as error: + on_error(error) + return + for filename in filenames: + try: + os.unlink(filename, dir_fd=fd) + except FileNotFoundError: + pass + except OSError as error: + error.filename = os.path.join(dirpath, filename) + on_error(error) + for dirname in dirnames: + try: + os.rmdir(dirname, dir_fd=fd) + except FileNotFoundError: + pass + except OSError as error: + error.filename = os.path.join(dirpath, dirname) + on_error(error) + os.rmdir(path) + except OSError as error: + on_error(error) + rmtree.avoids_symlink_attacks = True def rename(self, target): """ From 4c73a92be2958c55c99dd25956963e1e838d4ac3 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 27 May 2024 15:02:05 +0100 Subject: [PATCH 03/14] Undo shutil change --- Doc/library/pathlib.rst | 2 -- Lib/shutil.py | 1 - 2 files changed, 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 5dd80a96878d77..61d0c4ecbd3aab 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1513,8 +1513,6 @@ call fails (for example because the path doesn't exist). it to stop. Note that the filename is available as the ``filename`` attribute of the exception object. - This method calls :func:`shutil.rmtree` internally. - .. attribute:: Path.rmtree.avoids_symlink_attacks Indicates whether the current platform and implementation provides a diff --git a/Lib/shutil.py b/Lib/shutil.py index c523da63898d7e..c9b4da34b1e19b 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -774,7 +774,6 @@ def onexc(*args): exc_info = type(exc), exc, exc.__traceback__ return onerror(func, path, exc_info) - path = os.fspath(path) if _use_fd_functions: # While the unsafe rmtree works fine on bytes, the fd based does not. if isinstance(path, bytes): From 98fa2e247e5e08ec2455fac8438959444cb6b22c Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 27 May 2024 15:34:11 +0100 Subject: [PATCH 04/14] Line up implementations --- Lib/pathlib/_abc.py | 32 ++++++++++++++++---------------- Lib/pathlib/_local.py | 13 ++++--------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index c12601b8d45693..9dba6a213f96c5 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -823,30 +823,30 @@ def on_error(error): elif on_error is None: def on_error(error): raise - try: if self.is_symlink() or self.is_junction(): raise OSError("Cannot call rmtree on a symbolic link") - except OSError as error: - on_error(error) - return - - walker = self.walk(top_down=False, follow_symlinks=False, on_error=on_error) - for dirpath, _, filenames in walker: - for filename in filenames: - filepath = dirpath / filename + results = self.walk( + top_down=False, + on_error=on_error, + follow_symlinks=False) + for dirpath, _, filenames in results: + for filename in filenames: + filepath = dirpath / filename + try: + filepath.unlink() + except FileNotFoundError: + pass + except OSError as error: + on_error(error) try: - filepath.unlink() + dirpath.rmdir() except FileNotFoundError: pass except OSError as error: on_error(error) - try: - dirpath.rmdir() - except FileNotFoundError: - pass - except OSError as error: - on_error(error) + except OSError as error: + on_error(error) rmtree.avoids_symlink_attacks = False def owner(self, *, follow_symlinks=True): diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index f90aea44cbc9c3..91b443b6eb6451 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -793,19 +793,14 @@ def on_error(error): def on_error(error): raise try: - orig_st = os.lstat(path) - walker = os.fwalk( + if os.path.islink(path): + raise OSError("Cannot call rmtree on a symbolic link") + results = os.fwalk( path, topdown=False, onerror=on_error, follow_symlinks=os._walk_symlinks_as_files) - for dirpath, dirnames, filenames, fd in walker: - if dirpath == path and not os.path.samestat(orig_st, os.fstat(fd)): - try: - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as error: - on_error(error) - return + for dirpath, dirnames, filenames, fd in results: for filename in filenames: try: os.unlink(filename, dir_fd=fd) From b504019e43a2cf03d115c7f730b10b37d7f4d4fa Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 27 May 2024 18:14:27 +0100 Subject: [PATCH 05/14] Tests --- Lib/os.py | 8 +- Lib/pathlib/_abc.py | 1 + Lib/pathlib/_local.py | 1 + Lib/test/test_pathlib/test_pathlib.py | 250 ++++++++++++++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 91 +++++--- 5 files changed, 320 insertions(+), 31 deletions(-) diff --git a/Lib/os.py b/Lib/os.py index 7e8c89146d641e..f51680be289763 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -547,7 +547,13 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks): yield from _fwalk(dirfd, dirpath, isbytes, topdown, onerror, follow_symlinks) finally: - close(dirfd) + try: + close(dirfd) + except OSError as err: + err.filename = path.join(toppath, name) + if onerror is not None: + onerror(err) + continue if not topdown: yield toppath, dirs, nondirs, topfd diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 252000ee8d9737..2513c5b1337d65 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -842,6 +842,7 @@ def on_error(error): except OSError as error: on_error(error) except OSError as error: + error.filename = str(self) on_error(error) rmtree.avoids_symlink_attacks = False diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 79bf6874535331..f6bf3df00a7537 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -853,6 +853,7 @@ def on_error(error): on_error(error) os.rmdir(path) except OSError as error: + error.filename = path on_error(error) rmtree.avoids_symlink_attacks = True diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 89af1f7581764f..fad7aa3bbde2ea 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -16,6 +16,7 @@ from test.support import import_helper from test.support import is_emscripten, is_wasi from test.support import infinite_recursion +from test.support import swap_attr from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc @@ -31,6 +32,10 @@ if hasattr(os, 'geteuid'): root_in_posix = (os.geteuid() == 0) +rmtree_use_fd_functions = ( + {os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and + os.listdir in os.supports_fd and os.stat in os.supports_follow_symlinks) + # # Tests for the pure classes. # @@ -764,6 +769,251 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + def test_rmtree_uses_safe_fd_version_if_available(self): + if rmtree_use_fd_functions: + self.assertTrue(self.cls.rmtree.avoids_symlink_attacks) + d = self.cls(self.base, 'a') + d.mkdir() + try: + real_fwalk = os.fwalk + + class Called(Exception): + pass + + def _raiser(*args, **kwargs): + raise Called + + os.fwalk = _raiser + self.assertRaises(Called, d.rmtree) + finally: + os.fwalk = real_fwalk + else: + self.assertFalse(self.cls.rmtree.avoids_symlink_attacks) + + @unittest.skipIf(sys.platform[:6] == 'cygwin', + "This test can't be run on Cygwin (issue #1071513).") + @os_helper.skip_if_dac_override + @os_helper.skip_unless_working_chmod + def test_rmtree_unwritable(self): + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + child_file_path = tmp / 'a' + child_dir_path = tmp / 'b' + child_file_path.write_text("") + child_dir_path.mkdir() + old_dir_mode = tmp.stat().st_mode + old_child_file_mode = child_file_path.stat().st_mode + old_child_dir_mode = child_dir_path.stat().st_mode + # Make unwritable. + new_mode = stat.S_IREAD | stat.S_IEXEC + try: + child_file_path.chmod(new_mode) + child_dir_path.chmod(new_mode) + tmp.chmod(new_mode) + + errors = [] + tmp.rmtree(on_error=errors.append) + # Test whether onerror has actually been called. + print(errors) + self.assertEqual(len(errors), 3) + finally: + tmp.chmod(old_dir_mode) + child_file_path.chmod(old_child_file_mode) + child_dir_path.chmod(old_child_dir_mode) + + @needs_windows + def test_rmtree_inner_junction(self): + import _winapi + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + _winapi.CreateJunction(dir2, link1) + link2 = dir1 / 'link2' + _winapi.CreateJunction(dir3, link2) + link3 = dir1 / 'link3' + _winapi.CreateJunction(file1, link3) + # make sure junctions are removed but not followed + dir1.rmtree() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) + + @needs_windows + def test_rmtree_outer_junction(self): + import _winapi + tmp = self.cls(self.base, 'rmtree') + try: + src = tmp / 'cheese' + dst = tmp / 'shop' + src.mkdir() + spam = src / 'spam' + spam.write_text('') + _winapi.CreateJunction(src, dst) + self.assertRaises(OSError, dst.rmtree) + dst.rmtree(ignore_errors=True) + finally: + tmp.rmtree(ignore_errors=True) + + @needs_windows + def test_rmtree_outer_junction_on_error(self): + import _winapi + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + dir_ = tmp / 'dir' + dir_.mkdir() + link = tmp / 'link' + _winapi.CreateJunction(dir_, link) + self.addCleanup(os_helper.unlink, link) + self.assertRaises(OSError, link.rmtree) + self.assertTrue(dir_.exists()) + self.assertTrue(link.exists(follow_symlinks=False)) + errors = [] + + def on_error(error): + errors.append(error) + + link.rmtree(on_error=on_error) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], OSError) + self.assertEqual(errors[0].filename, str(link)) + + @unittest.skipUnless(rmtree_use_fd_functions, "requires safe rmtree") + def test_rmtree_fails_on_close(self): + # Test that the error handler is called for failed os.close() and that + # os.close() is only called once for a file descriptor. + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir1.mkdir() + dir2 = dir1 / 'dir2' + dir2.mkdir() + + def close(fd): + orig_close(fd) + nonlocal close_count + close_count += 1 + raise OSError + + close_count = 0 + with swap_attr(os, 'close', close) as orig_close: + with self.assertRaises(OSError): + dir1.rmtree() + self.assertTrue(dir2.is_dir()) + self.assertEqual(close_count, 2) + + close_count = 0 + errors = [] + + with swap_attr(os, 'close', close) as orig_close: + dir1.rmtree(on_error=errors.append) + print(errors) + self.assertEqual(len(errors), 2) + self.assertEqual(errors[0].filename, str(dir2)) + self.assertEqual(errors[1].filename, str(dir1)) + self.assertEqual(close_count, 2) + + @unittest.skipUnless(hasattr(os, "mkfifo"), 'requires os.mkfifo()') + @unittest.skipIf(sys.platform == "vxworks", + "fifo requires special path on VxWorks") + def test_rmtree_on_named_pipe(self): + p = self.cls(self.base, 'pipe') + os.mkfifo(p) + try: + with self.assertRaises(NotADirectoryError): + p.rmtree() + self.assertTrue(p.exists()) + finally: + p.unlink() + + p = self.cls(self.base, 'dir') + p.mkdir() + os.mkfifo(p / 'mypipe') + p.rmtree() + self.assertFalse(p.exists()) + + @unittest.skipIf(sys.platform[:6] == 'cygwin', + "This test can't be run on Cygwin (issue #1071513).") + @os_helper.skip_if_dac_override + @os_helper.skip_unless_working_chmod + def test_rmtree_deleted_race_condition(self): + # bpo-37260 + # + # Test that a file or a directory deleted after it is enumerated + # by scandir() but before unlink() or rmdr() is called doesn't + # generate any errors. + def on_error(exc): + if not isinstance(exc, PermissionError): + raise + # Make the parent and the children writeable. + for p, mode in zip(paths, old_modes): + os.chmod(p, mode) + # Remove other dirs except one. + keep = next(p for p in dirs if p != exc.filename) + for p in dirs: + if p != keep: + os.rmdir(p) + # Remove other files except one. + keep = next(p for p in files if p != exc.filename) + for p in files: + if p != keep: + os.unlink(p) + + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + paths = [tmp] + [tmp / f'child{i}' for i in range(6)] + dirs = paths[1::2] + files = paths[2::2] + for path in dirs: + path.mkdir() + for path in files: + path.write_text('') + + old_modes = [path.stat().st_mode for path in paths] + + # Make the parent and the children non-writeable. + new_mode = stat.S_IREAD | stat.S_IEXEC + for path in reversed(paths): + path.chmod(new_mode) + + try: + tmp.rmtree(on_error=on_error) + except: + # Test failed, so cleanup artifacts. + for path, mode in zip(paths, old_modes): + try: + path.chmod(mode) + except OSError: + pass + tmp.rmtree() + raise + + def test_rmtree_does_not_choke_on_failing_lstat(self): + try: + orig_lstat = os.lstat + + def raiser(fn, *args, **kwargs): + if fn != TESTFN: + raise OSError() + else: + return orig_lstat(fn) + + os.lstat = raiser + + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + foo = tmp / 'foo' + foo.write_text('') + tmp.rmtree() + finally: + os.lstat = orig_lstat + @os_helper.skip_unless_hardlink def test_hardlink_to(self): P = self.cls(self.base) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 47189e9b0dcc75..1cba49eac98ecc 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -2550,52 +2550,83 @@ def test_rmtree(self): self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'novel.txt').stat) def test_rmtree_errors(self): - base = self.cls(self.base) - - # missing file - p = base / 'nonexist' - self.assertRaises(FileNotFoundError, p.rmtree) - p.rmtree(ignore_errors=True) + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + # filename is guaranteed not to exist + filename = tmp / 'foo' + self.assertRaises(FileNotFoundError, filename.rmtree) + # test that ignore_errors option is honored + filename.rmtree(ignore_errors=True) # existing file - p = base / 'fileA' - self.assertRaises(NotADirectoryError, p.rmtree) - self.assertTrue(p.exists()) - p.rmtree(ignore_errors=True) - self.assertTrue(p.exists()) + filename = tmp / "tstfile" + filename.write_text("") + with self.assertRaises(NotADirectoryError) as cm: + filename.rmtree() + self.assertEqual(cm.exception.filename, str(filename)) + self.assertTrue(filename.exists()) + # test that ignore_errors option is honored + filename.rmtree(ignore_errors=True) + self.assertTrue(filename.exists()) def test_rmtree_on_error(self): + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + filename = tmp / "tstfile" + filename.write_text("") errors = [] - base = self.cls(self.base) - p = base / 'fileA' - p.rmtree(on_error=errors.append) + + def on_error(error): + errors.append(error) + + filename.rmtree(on_error=on_error) + self.assertEqual(len(errors), 1) self.assertIsInstance(errors[0], NotADirectoryError) - self.assertEqual(errors[0].filename, str(p)) + self.assertEqual(errors[0].filename, str(filename)) @needs_symlinks - def test_rmtree_symlink(self): - base = self.cls(self.base) - link = base / 'linkB' - target = link.resolve() + def test_rmtree_outer_symlink(self): + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + dir_ = tmp / 'dir' + dir_.mkdir() + link = tmp / 'link' + link.symlink_to(dir_) self.assertRaises(OSError, link.rmtree) + self.assertTrue(dir_.exists()) self.assertTrue(link.exists(follow_symlinks=False)) - self.assertTrue(target.exists()) errors = [] - link.rmtree(on_error=errors.append) + + def on_error(error): + errors.append(error) + + link.rmtree(on_error=on_error) self.assertEqual(len(errors), 1) self.assertIsInstance(errors[0], OSError) + self.assertEqual(errors[0].filename, str(link)) @needs_symlinks def test_rmtree_inner_symlink(self): - base = self.cls(self.base) - folder = base / 'dirA' - link = folder / 'linkC' - target = link.resolve() - - folder.rmtree() - self.assertRaises(FileNotFoundError, folder.stat) - self.assertRaises(FileNotFoundError, link.lstat) - target.stat() + tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + link1.symlink_to(dir2) + link2 = dir1 / 'link2' + link2.symlink_to(dir3) + link3 = dir1 / 'link3' + link3.symlink_to(file1) + # make sure symlinks are removed but not followed + dir1.rmtree() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) class DummyPathWithSymlinks(DummyPath): From ab7b0863a82908cf16eadebe2f50ee07b09a44bd Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 18 Jun 2024 23:30:27 +0100 Subject: [PATCH 06/14] Use implementation from shutil --- Doc/whatsnew/3.14.rst | 8 +- Lib/pathlib/_abc.py | 23 ++-- Lib/pathlib/_local.py | 74 +++------- Lib/pathlib/_os.py | 157 ++++++++++++++++++++++ Lib/shutil.py | 154 +-------------------- Lib/test/test_pathlib/test_pathlib.py | 10 +- Lib/test/test_pathlib/test_pathlib_abc.py | 6 +- 7 files changed, 203 insertions(+), 229 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 98103582d01df6..25e0c0b1cb799d 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -106,6 +106,8 @@ pathlib * Add :meth:`pathlib.Path.copy`, which copies the content of one file to another, like :func:`shutil.copyfile`. (Contributed by Barney Gale in :gh:`73991`.) +* Add :meth:`pathlib.Path.rmtree`, which recursively removes a directory. + (Contributed by Barney Gale in :gh:`73991`.) symtable -------- @@ -118,12 +120,6 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) -pathlib -------- - -* Add :meth:`pathlib.Path.rmtree`, which recursively removes a directory. - (Contributed by Barney Gale in :gh:`73991`.) - Optimizations ============= diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2d5787f89c409d..03e40c8dcff63f 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -883,21 +883,20 @@ def on_error(error): top_down=False, on_error=on_error, follow_symlinks=False) - for dirpath, _, filenames in results: - for filename in filenames: - filepath = dirpath / filename + for dirpath, dirnames, filenames in results: + for name in filenames: + child = dirpath / name try: - filepath.unlink() - except FileNotFoundError: - pass + child.unlink() except OSError as error: on_error(error) - try: - dirpath.rmdir() - except FileNotFoundError: - pass - except OSError as error: - on_error(error) + for name in dirnames: + child = dirpath / name + try: + child.rmdir() + except OSError as error: + on_error(error) + self.rmdir() except OSError as error: error.filename = str(self) on_error(error) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 97d4278d844b50..2e0c75b7eb8efa 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -18,7 +18,7 @@ grp = None from ._abc import UnsupportedOperation, PurePathBase, PathBase -from ._os import copyfile +from ._os import copyfile, rmtree as rmtree_impl __all__ = [ @@ -819,59 +819,27 @@ def rmdir(self): """ os.rmdir(self) - _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= - os.supports_dir_fd and - os.scandir in os.supports_fd and - os.stat in os.supports_follow_symlinks) - - if _use_fd_functions: - def rmtree(self, ignore_errors=False, on_error=None): - """ - Recursively delete this directory tree. + def rmtree(self, ignore_errors=False, on_error=None): + """ + Recursively delete this directory tree. - If *ignore_errors* is true, exceptions raised from scanning the tree - and removing files and directories are ignored. Otherwise, if - *on_error* is set, it will be called to handle the error. If neither - *ignore_errors* nor *on_error* are set, exceptions are propagated to - the caller. - """ - path = os.fspath(self) - if ignore_errors: - def on_error(error): - pass - elif on_error is None: - def on_error(error): - raise - try: - if os.path.islink(path): - raise OSError("Cannot call rmtree on a symbolic link") - results = os.fwalk( - path, - topdown=False, - onerror=on_error, - follow_symlinks=os._walk_symlinks_as_files) - for dirpath, dirnames, filenames, fd in results: - for filename in filenames: - try: - os.unlink(filename, dir_fd=fd) - except FileNotFoundError: - pass - except OSError as error: - error.filename = os.path.join(dirpath, filename) - on_error(error) - for dirname in dirnames: - try: - os.rmdir(dirname, dir_fd=fd) - except FileNotFoundError: - pass - except OSError as error: - error.filename = os.path.join(dirpath, dirname) - on_error(error) - os.rmdir(path) - except OSError as error: - error.filename = path - on_error(error) - rmtree.avoids_symlink_attacks = True + If *ignore_errors* is true, exceptions raised from scanning the tree + and removing files and directories are ignored. Otherwise, if + *on_error* is set, it will be called to handle the error. If neither + *ignore_errors* nor *on_error* are set, exceptions are propagated to + the caller. + """ + if ignore_errors: + def onexc(func, filename, err): + pass + elif on_error: + def onexc(func, filename, err): + on_error(err) + else: + def onexc(func, filename, err): + raise err + rmtree_impl(str(self), None, onexc) + rmtree.avoids_symlink_attacks = rmtree_impl.avoids_symlink_attacks def rename(self, target): """ diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index 1771d54e4167c1..32278d4a511ec3 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -4,6 +4,7 @@ from errno import EBADF, EOPNOTSUPP, ETXTBSY, EXDEV import os +import stat import sys try: import fcntl @@ -136,3 +137,159 @@ def copyfileobj(source_f, target_f): write_target = target_f.write while buf := read_source(1024 * 1024): write_target(buf) + +if ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and + os.scandir in os.supports_fd and os.stat in os.supports_follow_symlinks): + + def _rmtree_safe_fd_step(stack, onexc): + # Each stack item has four elements: + # * func: The first operation to perform: os.lstat, os.close or os.rmdir. + # Walking a directory starts with an os.lstat() to detect symlinks; in + # this case, func is updated before subsequent operations and passed to + # onexc() if an error occurs. + # * dirfd: Open file descriptor, or None if we're processing the top-level + # directory given to rmtree() and the user didn't supply dir_fd. + # * path: Path of file to operate upon. This is passed to onexc() if an + # error occurs. + # * orig_entry: os.DirEntry, or None if we're processing the top-level + # directory given to rmtree(). We used the cached stat() of the entry to + # save a call to os.lstat() when walking subdirectories. + func, dirfd, path, orig_entry = stack.pop() + name = path if orig_entry is None else orig_entry.name + try: + if func is os.close: + os.close(dirfd) + return + if func is os.rmdir: + os.rmdir(name, dir_fd=dirfd) + return + + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + assert func is os.lstat + if orig_entry is None: + orig_st = os.lstat(name, dir_fd=dirfd) + else: + orig_st = orig_entry.stat(follow_symlinks=False) + + func = os.open # For error reporting. + topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) + + func = os.path.islink # For error reporting. + try: + if not os.path.samestat(orig_st, os.fstat(topfd)): + # Symlinks to directories are forbidden, see GH-46010. + raise OSError("Cannot call rmtree on a symbolic link") + stack.append((os.rmdir, dirfd, path, orig_entry)) + finally: + stack.append((os.close, topfd, path, orig_entry)) + + func = os.scandir # For error reporting. + with os.scandir(topfd) as scandir_it: + entries = list(scandir_it) + for entry in entries: + fullname = os.path.join(path, entry.name) + try: + if entry.is_dir(follow_symlinks=False): + # Traverse into sub-directory. + stack.append((os.lstat, topfd, fullname, entry)) + continue + except FileNotFoundError: + continue + except OSError: + pass + try: + os.unlink(entry.name, dir_fd=topfd) + except FileNotFoundError: + continue + except OSError as err: + onexc(os.unlink, fullname, err) + except FileNotFoundError as err: + if orig_entry is None or func is os.close: + err.filename = path + onexc(func, path, err) + except OSError as err: + err.filename = path + onexc(func, path, err) + + # Version using fd-based APIs to protect against races + def rmtree(path, dir_fd, onexc): + # While the unsafe rmtree works fine on bytes, the fd based does not. + if isinstance(path, bytes): + path = os.fsdecode(path) + stack = [(os.lstat, dir_fd, path, None)] + try: + while stack: + _rmtree_safe_fd_step(stack, onexc) + finally: + # Close any file descriptors still on the stack. + while stack: + func, fd, path, entry = stack.pop() + if func is not os.close: + continue + try: + os.close(fd) + except OSError as err: + onexc(os.close, path, err) + + rmtree.avoids_symlink_attacks = True + +else: + if hasattr(os.stat_result, 'st_file_attributes'): + def _rmtree_islink(st): + return (stat.S_ISLNK(st.st_mode) or + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) + else: + def _rmtree_islink(st): + return stat.S_ISLNK(st.st_mode) + + # version vulnerable to race conditions + def rmtree(path, dir_fd, onexc): + if dir_fd is not None: + raise NotImplementedError("dir_fd unavailable on this platform") + try: + st = os.lstat(path) + except OSError as err: + onexc(os.lstat, path, err) + return + try: + if _rmtree_islink(st): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as err: + onexc(os.path.islink, path, err) + # can't continue even if onexc hook returns + return + + def onerror(err): + if not isinstance(err, FileNotFoundError): + onexc(os.scandir, err.filename, err) + + results = os.walk(path, topdown=False, onerror=onerror, + followlinks=os._walk_symlinks_as_files) + for dirpath, dirnames, filenames in results: + for name in dirnames: + fullname = os.path.join(dirpath, name) + try: + os.rmdir(fullname) + except FileNotFoundError: + continue + except OSError as err: + onexc(os.rmdir, fullname, err) + for name in filenames: + fullname = os.path.join(dirpath, name) + try: + os.unlink(fullname) + except FileNotFoundError: + continue + except OSError as err: + onexc(os.unlink, fullname, err) + try: + os.rmdir(path) + except FileNotFoundError: + pass + except OSError as err: + onexc(os.rmdir, path, err) + + rmtree.avoids_symlink_attacks = False diff --git a/Lib/shutil.py b/Lib/shutil.py index 0235f6bae32f14..d9d4531c06b48f 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,6 +10,7 @@ import fnmatch import collections import errno +from pathlib._os import rmtree as _rmtree_impl try: import zlib @@ -595,157 +596,6 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) -if hasattr(os.stat_result, 'st_file_attributes'): - def _rmtree_islink(st): - return (stat.S_ISLNK(st.st_mode) or - (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT - and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) -else: - def _rmtree_islink(st): - return stat.S_ISLNK(st.st_mode) - -# version vulnerable to race conditions -def _rmtree_unsafe(path, dir_fd, onexc): - if dir_fd is not None: - raise NotImplementedError("dir_fd unavailable on this platform") - try: - st = os.lstat(path) - except OSError as err: - onexc(os.lstat, path, err) - return - try: - if _rmtree_islink(st): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) - # can't continue even if onexc hook returns - return - def onerror(err): - if not isinstance(err, FileNotFoundError): - onexc(os.scandir, err.filename, err) - results = os.walk(path, topdown=False, onerror=onerror, followlinks=os._walk_symlinks_as_files) - for dirpath, dirnames, filenames in results: - for name in dirnames: - fullname = os.path.join(dirpath, name) - try: - os.rmdir(fullname) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.rmdir, fullname, err) - for name in filenames: - fullname = os.path.join(dirpath, name) - try: - os.unlink(fullname) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.unlink, fullname, err) - try: - os.rmdir(path) - except FileNotFoundError: - pass - except OSError as err: - onexc(os.rmdir, path, err) - -# Version using fd-based APIs to protect against races -def _rmtree_safe_fd(path, dir_fd, onexc): - # While the unsafe rmtree works fine on bytes, the fd based does not. - if isinstance(path, bytes): - path = os.fsdecode(path) - stack = [(os.lstat, dir_fd, path, None)] - try: - while stack: - _rmtree_safe_fd_step(stack, onexc) - finally: - # Close any file descriptors still on the stack. - while stack: - func, fd, path, entry = stack.pop() - if func is not os.close: - continue - try: - os.close(fd) - except OSError as err: - onexc(os.close, path, err) - -def _rmtree_safe_fd_step(stack, onexc): - # Each stack item has four elements: - # * func: The first operation to perform: os.lstat, os.close or os.rmdir. - # Walking a directory starts with an os.lstat() to detect symlinks; in - # this case, func is updated before subsequent operations and passed to - # onexc() if an error occurs. - # * dirfd: Open file descriptor, or None if we're processing the top-level - # directory given to rmtree() and the user didn't supply dir_fd. - # * path: Path of file to operate upon. This is passed to onexc() if an - # error occurs. - # * orig_entry: os.DirEntry, or None if we're processing the top-level - # directory given to rmtree(). We used the cached stat() of the entry to - # save a call to os.lstat() when walking subdirectories. - func, dirfd, path, orig_entry = stack.pop() - name = path if orig_entry is None else orig_entry.name - try: - if func is os.close: - os.close(dirfd) - return - if func is os.rmdir: - os.rmdir(name, dir_fd=dirfd) - return - - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - assert func is os.lstat - if orig_entry is None: - orig_st = os.lstat(name, dir_fd=dirfd) - else: - orig_st = orig_entry.stat(follow_symlinks=False) - - func = os.open # For error reporting. - topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) - - func = os.path.islink # For error reporting. - try: - if not os.path.samestat(orig_st, os.fstat(topfd)): - # Symlinks to directories are forbidden, see GH-46010. - raise OSError("Cannot call rmtree on a symbolic link") - stack.append((os.rmdir, dirfd, path, orig_entry)) - finally: - stack.append((os.close, topfd, path, orig_entry)) - - func = os.scandir # For error reporting. - with os.scandir(topfd) as scandir_it: - entries = list(scandir_it) - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - if entry.is_dir(follow_symlinks=False): - # Traverse into sub-directory. - stack.append((os.lstat, topfd, fullname, entry)) - continue - except FileNotFoundError: - continue - except OSError: - pass - try: - os.unlink(entry.name, dir_fd=topfd) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.unlink, fullname, err) - except FileNotFoundError as err: - if orig_entry is None or func is os.close: - err.filename = path - onexc(func, path, err) - except OSError as err: - err.filename = path - onexc(func, path, err) - -_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= - os.supports_dir_fd and - os.scandir in os.supports_fd and - os.stat in os.supports_follow_symlinks) -_rmtree_impl = _rmtree_safe_fd if _use_fd_functions else _rmtree_unsafe - def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -792,7 +642,7 @@ def onexc(*args): # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform -rmtree.avoids_symlink_attacks = _use_fd_functions +rmtree.avoids_symlink_attacks = _rmtree_impl.avoids_symlink_attacks def _basename(path): """A basename() variant which first strips the trailing slash, if present. diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index fad7aa3bbde2ea..0c7f7301c394cd 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -775,7 +775,7 @@ def test_rmtree_uses_safe_fd_version_if_available(self): d = self.cls(self.base, 'a') d.mkdir() try: - real_fwalk = os.fwalk + real_open = os.open class Called(Exception): pass @@ -783,10 +783,10 @@ class Called(Exception): def _raiser(*args, **kwargs): raise Called - os.fwalk = _raiser + os.open = _raiser self.assertRaises(Called, d.rmtree) finally: - os.fwalk = real_fwalk + os.open = real_open else: self.assertFalse(self.cls.rmtree.avoids_symlink_attacks) @@ -997,16 +997,16 @@ def on_error(exc): def test_rmtree_does_not_choke_on_failing_lstat(self): try: orig_lstat = os.lstat + tmp = self.cls(self.base, 'rmtree') def raiser(fn, *args, **kwargs): - if fn != TESTFN: + if fn != str(tmp): raise OSError() else: return orig_lstat(fn) os.lstat = raiser - tmp = self.cls(self.base, 'rmtree') tmp.mkdir() foo = tmp / 'foo' foo.write_text('') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 7248ea4ddcf69d..ff7cfc0274f730 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -2497,9 +2497,13 @@ def on_error(error): errors.append(error) filename.rmtree(on_error=on_error) - self.assertEqual(len(errors), 1) + self.assertEqual(len(errors), 2) + # First from scandir() self.assertIsInstance(errors[0], NotADirectoryError) self.assertEqual(errors[0].filename, str(filename)) + # Then from munlink() + self.assertIsInstance(errors[1], NotADirectoryError) + self.assertEqual(errors[1].filename, str(filename)) @needs_symlinks def test_rmtree_outer_symlink(self): From f88fc12dbb0b84a465564cb4fe7a39073b0fbb76 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Jun 2024 00:14:06 +0100 Subject: [PATCH 07/14] Fix some Windows tests. --- Lib/pathlib/_local.py | 1 + Lib/test/test_pathlib/test_pathlib.py | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 2e0c75b7eb8efa..240034455cceb2 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -834,6 +834,7 @@ def onexc(func, filename, err): pass elif on_error: def onexc(func, filename, err): + err.filename = filename on_error(err) else: def onexc(func, filename, err): diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 0c7f7301c394cd..f9da95c813438f 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -834,11 +834,11 @@ def test_rmtree_inner_junction(self): file1 = tmp / 'file1' file1.write_text('foo') link1 = dir1 / 'link1' - _winapi.CreateJunction(dir2, link1) + _winapi.CreateJunction(str(dir2), str(link1)) link2 = dir1 / 'link2' - _winapi.CreateJunction(dir3, link2) + _winapi.CreateJunction(str(dir3), str(link2)) link3 = dir1 / 'link3' - _winapi.CreateJunction(file1, link3) + _winapi.CreateJunction(str(file1), str(link3)) # make sure junctions are removed but not followed dir1.rmtree() self.assertFalse(dir1.exists()) @@ -849,13 +849,14 @@ def test_rmtree_inner_junction(self): def test_rmtree_outer_junction(self): import _winapi tmp = self.cls(self.base, 'rmtree') + tmp.mkdir() try: src = tmp / 'cheese' dst = tmp / 'shop' src.mkdir() spam = src / 'spam' spam.write_text('') - _winapi.CreateJunction(src, dst) + _winapi.CreateJunction(str(src), str(dst)) self.assertRaises(OSError, dst.rmtree) dst.rmtree(ignore_errors=True) finally: @@ -869,7 +870,7 @@ def test_rmtree_outer_junction_on_error(self): dir_ = tmp / 'dir' dir_.mkdir() link = tmp / 'link' - _winapi.CreateJunction(dir_, link) + _winapi.CreateJunction(str(dir_), str(link)) self.addCleanup(os_helper.unlink, link) self.assertRaises(OSError, link.rmtree) self.assertTrue(dir_.exists()) From 5a0fc076b6a1a661ed817a0ab01e47526a555806 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Jun 2024 00:57:34 +0100 Subject: [PATCH 08/14] More Windows test tweaks --- Lib/test/test_pathlib/test_pathlib.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index f9da95c813438f..90e49f7c7291d3 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -871,19 +871,21 @@ def test_rmtree_outer_junction_on_error(self): dir_.mkdir() link = tmp / 'link' _winapi.CreateJunction(str(dir_), str(link)) - self.addCleanup(os_helper.unlink, link) - self.assertRaises(OSError, link.rmtree) - self.assertTrue(dir_.exists()) - self.assertTrue(link.exists(follow_symlinks=False)) - errors = [] + try: + self.assertRaises(OSError, link.rmtree) + self.assertTrue(dir_.exists()) + self.assertTrue(link.exists(follow_symlinks=False)) + errors = [] - def on_error(error): - errors.append(error) + def on_error(error): + errors.append(error) - link.rmtree(on_error=on_error) - self.assertEqual(len(errors), 1) - self.assertIsInstance(errors[0], OSError) - self.assertEqual(errors[0].filename, str(link)) + link.rmtree(on_error=on_error) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], OSError) + self.assertEqual(errors[0].filename, str(link)) + finally: + os.unlink(str(link)) @unittest.skipUnless(rmtree_use_fd_functions, "requires safe rmtree") def test_rmtree_fails_on_close(self): From 2559e6a460b69e80b1600a4f773811b97a9698b8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Jun 2024 01:18:05 +0100 Subject: [PATCH 09/14] This is confusing :/ --- Lib/test/test_pathlib/test_pathlib.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 90e49f7c7291d3..cd4fbf015be198 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -952,21 +952,22 @@ def test_rmtree_deleted_race_condition(self): # by scandir() but before unlink() or rmdr() is called doesn't # generate any errors. def on_error(exc): + assert exc.filename if not isinstance(exc, PermissionError): raise # Make the parent and the children writeable. for p, mode in zip(paths, old_modes): - os.chmod(p, mode) + p.chmod(mode) # Remove other dirs except one. - keep = next(p for p in dirs if p != exc.filename) + keep = next(p for p in dirs if str(p) != exc.filename) for p in dirs: if p != keep: - os.rmdir(p) + p.rmdir() # Remove other files except one. - keep = next(p for p in files if p != exc.filename) + keep = next(p for p in files if str(p) != exc.filename) for p in files: if p != keep: - os.unlink(p) + p.unlink() tmp = self.cls(self.base, 'rmtree') tmp.mkdir() From bcff0539ca22cceead6c574c42f7420579f604b0 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 24 Jun 2024 18:58:36 +0100 Subject: [PATCH 10/14] WIP --- Lib/pathlib/_abc.py | 1 - Lib/pathlib/_local.py | 14 +-- Lib/pathlib/_os.py | 156 -------------------------- Lib/shutil.py | 154 ++++++++++++++++++++++++- Lib/test/test_pathlib/test_pathlib.py | 3 - 5 files changed, 157 insertions(+), 171 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b460faea4f0afb..b727071736ebd6 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -935,7 +935,6 @@ def on_error(error): except OSError as error: error.filename = str(self) on_error(error) - rmtree.avoids_symlink_attacks = False def owner(self, *, follow_symlinks=True): """ diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 5fb341420c18de..a9ceb1d7b80114 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -18,7 +18,7 @@ grp = None from ._abc import UnsupportedOperation, PurePathBase, PathBase -from ._os import copyfile, rmtree as rmtree_impl +from ._os import copyfile __all__ = [ @@ -831,18 +831,14 @@ def rmtree(self, ignore_errors=False, on_error=None): *ignore_errors* nor *on_error* are set, exceptions are propagated to the caller. """ - if ignore_errors: - def onexc(func, filename, err): - pass - elif on_error: + if on_error: def onexc(func, filename, err): err.filename = filename on_error(err) else: - def onexc(func, filename, err): - raise err - rmtree_impl(str(self), None, onexc) - rmtree.avoids_symlink_attacks = rmtree_impl.avoids_symlink_attacks + onexc = None + import shutil + shutil.rmtree(str(self), ignore_errors, onexc=onexc) def rename(self, target): """ diff --git a/Lib/pathlib/_os.py b/Lib/pathlib/_os.py index d273e28f4d4f50..bbb019b6534503 100644 --- a/Lib/pathlib/_os.py +++ b/Lib/pathlib/_os.py @@ -157,159 +157,3 @@ def copyfileobj(source_f, target_f): write_target = target_f.write while buf := read_source(1024 * 1024): write_target(buf) - -if ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and - os.scandir in os.supports_fd and os.stat in os.supports_follow_symlinks): - - def _rmtree_safe_fd_step(stack, onexc): - # Each stack item has four elements: - # * func: The first operation to perform: os.lstat, os.close or os.rmdir. - # Walking a directory starts with an os.lstat() to detect symlinks; in - # this case, func is updated before subsequent operations and passed to - # onexc() if an error occurs. - # * dirfd: Open file descriptor, or None if we're processing the top-level - # directory given to rmtree() and the user didn't supply dir_fd. - # * path: Path of file to operate upon. This is passed to onexc() if an - # error occurs. - # * orig_entry: os.DirEntry, or None if we're processing the top-level - # directory given to rmtree(). We used the cached stat() of the entry to - # save a call to os.lstat() when walking subdirectories. - func, dirfd, path, orig_entry = stack.pop() - name = path if orig_entry is None else orig_entry.name - try: - if func is os.close: - os.close(dirfd) - return - if func is os.rmdir: - os.rmdir(name, dir_fd=dirfd) - return - - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - assert func is os.lstat - if orig_entry is None: - orig_st = os.lstat(name, dir_fd=dirfd) - else: - orig_st = orig_entry.stat(follow_symlinks=False) - - func = os.open # For error reporting. - topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) - - func = os.path.islink # For error reporting. - try: - if not os.path.samestat(orig_st, os.fstat(topfd)): - # Symlinks to directories are forbidden, see GH-46010. - raise OSError("Cannot call rmtree on a symbolic link") - stack.append((os.rmdir, dirfd, path, orig_entry)) - finally: - stack.append((os.close, topfd, path, orig_entry)) - - func = os.scandir # For error reporting. - with os.scandir(topfd) as scandir_it: - entries = list(scandir_it) - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - if entry.is_dir(follow_symlinks=False): - # Traverse into sub-directory. - stack.append((os.lstat, topfd, fullname, entry)) - continue - except FileNotFoundError: - continue - except OSError: - pass - try: - os.unlink(entry.name, dir_fd=topfd) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.unlink, fullname, err) - except FileNotFoundError as err: - if orig_entry is None or func is os.close: - err.filename = path - onexc(func, path, err) - except OSError as err: - err.filename = path - onexc(func, path, err) - - # Version using fd-based APIs to protect against races - def rmtree(path, dir_fd, onexc): - # While the unsafe rmtree works fine on bytes, the fd based does not. - if isinstance(path, bytes): - path = os.fsdecode(path) - stack = [(os.lstat, dir_fd, path, None)] - try: - while stack: - _rmtree_safe_fd_step(stack, onexc) - finally: - # Close any file descriptors still on the stack. - while stack: - func, fd, path, entry = stack.pop() - if func is not os.close: - continue - try: - os.close(fd) - except OSError as err: - onexc(os.close, path, err) - - rmtree.avoids_symlink_attacks = True - -else: - if hasattr(os.stat_result, 'st_file_attributes'): - def _rmtree_islink(st): - return (stat.S_ISLNK(st.st_mode) or - (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT - and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) - else: - def _rmtree_islink(st): - return stat.S_ISLNK(st.st_mode) - - # version vulnerable to race conditions - def rmtree(path, dir_fd, onexc): - if dir_fd is not None: - raise NotImplementedError("dir_fd unavailable on this platform") - try: - st = os.lstat(path) - except OSError as err: - onexc(os.lstat, path, err) - return - try: - if _rmtree_islink(st): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) - # can't continue even if onexc hook returns - return - - def onerror(err): - if not isinstance(err, FileNotFoundError): - onexc(os.scandir, err.filename, err) - - results = os.walk(path, topdown=False, onerror=onerror, - followlinks=os._walk_symlinks_as_files) - for dirpath, dirnames, filenames in results: - for name in dirnames: - fullname = os.path.join(dirpath, name) - try: - os.rmdir(fullname) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.rmdir, fullname, err) - for name in filenames: - fullname = os.path.join(dirpath, name) - try: - os.unlink(fullname) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.unlink, fullname, err) - try: - os.rmdir(path) - except FileNotFoundError: - pass - except OSError as err: - onexc(os.rmdir, path, err) - - rmtree.avoids_symlink_attacks = False diff --git a/Lib/shutil.py b/Lib/shutil.py index d9d4531c06b48f..0235f6bae32f14 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,7 +10,6 @@ import fnmatch import collections import errno -from pathlib._os import rmtree as _rmtree_impl try: import zlib @@ -596,6 +595,157 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=ignore_dangling_symlinks, dirs_exist_ok=dirs_exist_ok) +if hasattr(os.stat_result, 'st_file_attributes'): + def _rmtree_islink(st): + return (stat.S_ISLNK(st.st_mode) or + (st.st_file_attributes & stat.FILE_ATTRIBUTE_REPARSE_POINT + and st.st_reparse_tag == stat.IO_REPARSE_TAG_MOUNT_POINT)) +else: + def _rmtree_islink(st): + return stat.S_ISLNK(st.st_mode) + +# version vulnerable to race conditions +def _rmtree_unsafe(path, dir_fd, onexc): + if dir_fd is not None: + raise NotImplementedError("dir_fd unavailable on this platform") + try: + st = os.lstat(path) + except OSError as err: + onexc(os.lstat, path, err) + return + try: + if _rmtree_islink(st): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError as err: + onexc(os.path.islink, path, err) + # can't continue even if onexc hook returns + return + def onerror(err): + if not isinstance(err, FileNotFoundError): + onexc(os.scandir, err.filename, err) + results = os.walk(path, topdown=False, onerror=onerror, followlinks=os._walk_symlinks_as_files) + for dirpath, dirnames, filenames in results: + for name in dirnames: + fullname = os.path.join(dirpath, name) + try: + os.rmdir(fullname) + except FileNotFoundError: + continue + except OSError as err: + onexc(os.rmdir, fullname, err) + for name in filenames: + fullname = os.path.join(dirpath, name) + try: + os.unlink(fullname) + except FileNotFoundError: + continue + except OSError as err: + onexc(os.unlink, fullname, err) + try: + os.rmdir(path) + except FileNotFoundError: + pass + except OSError as err: + onexc(os.rmdir, path, err) + +# Version using fd-based APIs to protect against races +def _rmtree_safe_fd(path, dir_fd, onexc): + # While the unsafe rmtree works fine on bytes, the fd based does not. + if isinstance(path, bytes): + path = os.fsdecode(path) + stack = [(os.lstat, dir_fd, path, None)] + try: + while stack: + _rmtree_safe_fd_step(stack, onexc) + finally: + # Close any file descriptors still on the stack. + while stack: + func, fd, path, entry = stack.pop() + if func is not os.close: + continue + try: + os.close(fd) + except OSError as err: + onexc(os.close, path, err) + +def _rmtree_safe_fd_step(stack, onexc): + # Each stack item has four elements: + # * func: The first operation to perform: os.lstat, os.close or os.rmdir. + # Walking a directory starts with an os.lstat() to detect symlinks; in + # this case, func is updated before subsequent operations and passed to + # onexc() if an error occurs. + # * dirfd: Open file descriptor, or None if we're processing the top-level + # directory given to rmtree() and the user didn't supply dir_fd. + # * path: Path of file to operate upon. This is passed to onexc() if an + # error occurs. + # * orig_entry: os.DirEntry, or None if we're processing the top-level + # directory given to rmtree(). We used the cached stat() of the entry to + # save a call to os.lstat() when walking subdirectories. + func, dirfd, path, orig_entry = stack.pop() + name = path if orig_entry is None else orig_entry.name + try: + if func is os.close: + os.close(dirfd) + return + if func is os.rmdir: + os.rmdir(name, dir_fd=dirfd) + return + + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + assert func is os.lstat + if orig_entry is None: + orig_st = os.lstat(name, dir_fd=dirfd) + else: + orig_st = orig_entry.stat(follow_symlinks=False) + + func = os.open # For error reporting. + topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) + + func = os.path.islink # For error reporting. + try: + if not os.path.samestat(orig_st, os.fstat(topfd)): + # Symlinks to directories are forbidden, see GH-46010. + raise OSError("Cannot call rmtree on a symbolic link") + stack.append((os.rmdir, dirfd, path, orig_entry)) + finally: + stack.append((os.close, topfd, path, orig_entry)) + + func = os.scandir # For error reporting. + with os.scandir(topfd) as scandir_it: + entries = list(scandir_it) + for entry in entries: + fullname = os.path.join(path, entry.name) + try: + if entry.is_dir(follow_symlinks=False): + # Traverse into sub-directory. + stack.append((os.lstat, topfd, fullname, entry)) + continue + except FileNotFoundError: + continue + except OSError: + pass + try: + os.unlink(entry.name, dir_fd=topfd) + except FileNotFoundError: + continue + except OSError as err: + onexc(os.unlink, fullname, err) + except FileNotFoundError as err: + if orig_entry is None or func is os.close: + err.filename = path + onexc(func, path, err) + except OSError as err: + err.filename = path + onexc(func, path, err) + +_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= + os.supports_dir_fd and + os.scandir in os.supports_fd and + os.stat in os.supports_follow_symlinks) +_rmtree_impl = _rmtree_safe_fd if _use_fd_functions else _rmtree_unsafe + def rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None): """Recursively delete a directory tree. @@ -642,7 +792,7 @@ def onexc(*args): # Allow introspection of whether or not the hardening against symlink # attacks is supported on the current platform -rmtree.avoids_symlink_attacks = _rmtree_impl.avoids_symlink_attacks +rmtree.avoids_symlink_attacks = _use_fd_functions def _basename(path): """A basename() variant which first strips the trailing slash, if present. diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index ef78ffe49221f4..d6ca7ba9c7362b 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -784,7 +784,6 @@ def test_group_no_follow_symlinks(self): def test_rmtree_uses_safe_fd_version_if_available(self): if rmtree_use_fd_functions: - self.assertTrue(self.cls.rmtree.avoids_symlink_attacks) d = self.cls(self.base, 'a') d.mkdir() try: @@ -800,8 +799,6 @@ def _raiser(*args, **kwargs): self.assertRaises(Called, d.rmtree) finally: os.open = real_open - else: - self.assertFalse(self.cls.rmtree.avoids_symlink_attacks) @unittest.skipIf(sys.platform[:6] == 'cygwin', "This test can't be run on Cygwin (issue #1071513).") From d8c4291f30bb3c16436e53947edba67181fdc3fc Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 27 Jun 2024 18:58:38 +0100 Subject: [PATCH 11/14] Fix docs --- Doc/library/pathlib.rst | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 884f18ee097c75..1fee89c2901aeb 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1560,9 +1560,7 @@ Copying, renaming and deleting other platforms, the :func:`~Path.rmtree` implementation is susceptible to a symlink attack: given proper timing and circumstances, attackers can manipulate symlinks on the filesystem to delete files they wouldn't - be able to access otherwise. Applications can use the - :data:`Path.rmtree.avoids_symlink_attacks` function attribute to - determine which case applies. + be able to access otherwise. If the optional argument *on_error* is specified, it should be a callable; it will be called with one argument, an :exc:`OSError` instance. The @@ -1570,13 +1568,6 @@ Copying, renaming and deleting it to stop. Note that the filename is available as the ``filename`` attribute of the exception object. - .. attribute:: Path.rmtree.avoids_symlink_attacks - - Indicates whether the current platform and implementation provides a - symlink attack resistant version of :meth:`~Path.rmtree`. Currently - this is only true for platforms supporting fd-based directory access - functions. - .. versionadded:: 3.14 From a9c8d4f39271513df65d579a0173c263c03e9254 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 27 Jun 2024 19:09:39 +0100 Subject: [PATCH 12/14] Minor tweaks --- Lib/pathlib/_abc.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b727071736ebd6..e1861711e29c7c 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -906,35 +906,33 @@ def rmtree(self, ignore_errors=False, on_error=None): the caller. """ if ignore_errors: - def on_error(error): + def on_error(err): pass elif on_error is None: - def on_error(error): - raise + def on_error(err): + raise err try: if self.is_symlink() or self.is_junction(): raise OSError("Cannot call rmtree on a symbolic link") results = self.walk( - top_down=False, on_error=on_error, + top_down=False, follow_symlinks=False) for dirpath, dirnames, filenames in results: for name in filenames: - child = dirpath / name try: - child.unlink() - except OSError as error: - on_error(error) + dirpath.joinpath(name).unlink() + except OSError as err: + on_error(err) for name in dirnames: - child = dirpath / name try: - child.rmdir() - except OSError as error: - on_error(error) + dirpath.joinpath(name).rmdir() + except OSError as err: + on_error(err) self.rmdir() - except OSError as error: - error.filename = str(self) - on_error(error) + except OSError as err: + err.filename = str(self) + on_error(err) def owner(self, *, follow_symlinks=True): """ From 4bcc143401cfe24b452d104dd82759418a15ef36 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 4 Jul 2024 19:33:31 +0100 Subject: [PATCH 13/14] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Doc/library/pathlib.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 1fee89c2901aeb..8d83dd720028a7 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1545,7 +1545,7 @@ Copying, renaming and deleting .. method:: Path.rmtree(ignore_errors=False, on_error=None) - Delete this entire directory tree. The path must not refer to a symlink. + Recursively delete this entire directory tree. The path must not refer to a symlink. If *ignore_errors* is true, errors resulting from failed removals will be ignored. If *ignore_errors* is false or omitted, and a function is given to @@ -1555,17 +1555,17 @@ Copying, renaming and deleting .. note:: - On platforms that support the necessary fd-based functions a symlink - attack resistant version of :meth:`~Path.rmtree` is used by default. On + On platforms that support the necessary fd-based functions, a symlink + attack-resistant version of :meth:`~Path.rmtree` is used by default. On other platforms, the :func:`~Path.rmtree` implementation is susceptible to a symlink attack: given proper timing and circumstances, attackers - can manipulate symlinks on the filesystem to delete files they wouldn't + can manipulate symlinks on the filesystem to delete files they would not be able to access otherwise. If the optional argument *on_error* is specified, it should be a callable; - it will be called with one argument, an :exc:`OSError` instance. The + it will be called with one argument of type :exc:`OSError`. The callable can handle the error to continue the deletion process or re-raise - it to stop. Note that the filename is available as the ``filename`` + it to stop. Note that the filename is available as the :attr:`~OSError.filename` attribute of the exception object. .. versionadded:: 3.14 From ab0f447e41db07a65bce3a300d6479669568a1fc Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 4 Jul 2024 19:48:29 +0100 Subject: [PATCH 14/14] Address review feedback. --- Doc/whatsnew/3.14.rst | 16 +++++++++------- Lib/pathlib/_abc.py | 6 ++++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7a615ae72f3662..f701da8ba70310 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -103,13 +103,15 @@ os pathlib ------- -* Add :meth:`pathlib.Path.copy`, which copies the content of one file to - another, like :func:`shutil.copyfile`. - (Contributed by Barney Gale in :gh:`73991`.) -* Add :meth:`pathlib.Path.copytree`, which copies one directory tree to - another. - (Contributed by Barney Gale in :gh:`73991`.) -* Add :meth:`pathlib.Path.rmtree`, which recursively removes a directory. +* Add methods to :class:`pathlib.Path` to recursively copy or remove files: + + * :meth:`~pathlib.Path.copy` copies the content of one file to another, like + :func:`shutil.copyfile`. + * :meth:`~pathlib.Path.copytree` copies one directory tree to another, like + :func:`shutil.copytree`. + * :meth:`~pathlib.Path.rmtree` recursively removes a directory tree, like + :func:`shutil.rmtree`. + (Contributed by Barney Gale in :gh:`73991`.) symtable diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e1861711e29c7c..b20c078badf681 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -912,11 +912,13 @@ def on_error(err): def on_error(err): raise err try: - if self.is_symlink() or self.is_junction(): + if self.is_symlink(): raise OSError("Cannot call rmtree on a symbolic link") + elif self.is_junction(): + raise OSError("Cannot call rmtree on a junction") results = self.walk( on_error=on_error, - top_down=False, + top_down=False, # Bottom-up so we rmdir() empty directories. follow_symlinks=False) for dirpath, dirnames, filenames in results: for name in filenames: