diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 8e91936680fab8..24ad3c40c22ca0 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -96,7 +96,7 @@ Pure path objects provide path-handling operations which don't actually access a filesystem. There are three ways to access these classes, which we also call *flavours*: -.. class:: PurePath(*pathsegments) +.. class:: PurePath(*pathsegments, blueprint=None) A generic class that represents the system's path flavour (instantiating it creates either a :class:`PurePosixPath` or a :class:`PureWindowsPath`):: @@ -150,13 +150,39 @@ we also call *flavours*: to ``PurePosixPath('bar')``, which is wrong if ``foo`` is a symbolic link to another directory) + The optional *blueprint* argument may provide another path object. It is + supplied whenever a new path object is created from an existing one, such + as in :attr:`parent` or :meth:`relative_to`. Subclasses may use this to + pass information between path objects. For example:: + + from pathlib import PurePosixPath + + class MyPath(PurePosixPath): + def __init__(self, *pathsegments, blueprint=None, session_id=None): + super().__init__(*pathsegments, blueprint=blueprint) + if blueprint: + self.session_id = blueprint.session_id + else: + self.session_id = session_id + + etc = MyPath('/etc', session_id=42) + hosts = etc / 'hosts' + print(hosts.session_id) # 42 + + .. note:: + The classes provided in this module ignore the *blueprint* argument. + It is there purely as a hook for user-defined subclasses. + + .. versionadded:: 3.12 + The *blueprint* argument. + Pure path objects implement the :class:`os.PathLike` interface, allowing them to be used anywhere the interface is accepted. .. versionchanged:: 3.6 Added support for the :class:`os.PathLike` interface. -.. class:: PurePosixPath(*pathsegments) +.. class:: PurePosixPath(*pathsegments, blueprint=None) A subclass of :class:`PurePath`, this path flavour represents non-Windows filesystem paths:: @@ -164,9 +190,9 @@ we also call *flavours*: >>> PurePosixPath('/etc') PurePosixPath('/etc') - *pathsegments* is specified similarly to :class:`PurePath`. + *pathsegments* and *blueprint* are specified similarly to :class:`PurePath`. -.. class:: PureWindowsPath(*pathsegments) +.. class:: PureWindowsPath(*pathsegments, blueprint=None) A subclass of :class:`PurePath`, this path flavour represents Windows filesystem paths, including `UNC paths`_:: @@ -176,7 +202,7 @@ we also call *flavours*: >>> PureWindowsPath('//server/share/file') PureWindowsPath('//server/share/file') - *pathsegments* is specified similarly to :class:`PurePath`. + *pathsegments* and *blueprint* are specified similarly to :class:`PurePath`. .. _unc paths: https://en.wikipedia.org/wiki/Path_(computing)#UNC @@ -530,10 +556,10 @@ Pure paths provide the following methods and properties: unintended effects. -.. method:: PurePath.joinpath(*other) +.. method:: PurePath.joinpath(*pathsegments) Calling this method is equivalent to combining the path with each of - the *other* arguments in turn:: + the given *pathsegments* in turn:: >>> PurePosixPath('/etc').joinpath('passwd') PurePosixPath('/etc/passwd') @@ -690,7 +716,7 @@ Concrete paths are subclasses of the pure path classes. In addition to operations provided by the latter, they also provide methods to do system calls on path objects. There are three ways to instantiate concrete paths: -.. class:: Path(*pathsegments) +.. class:: Path(*pathsegments, blueprint=None) A subclass of :class:`PurePath`, this class represents concrete paths of the system's path flavour (instantiating it creates either a @@ -699,9 +725,9 @@ calls on path objects. There are three ways to instantiate concrete paths: >>> Path('setup.py') PosixPath('setup.py') - *pathsegments* is specified similarly to :class:`PurePath`. + *pathsegments* and *blueprint* are specified similarly to :class:`PurePath`. -.. class:: PosixPath(*pathsegments) +.. class:: PosixPath(*pathsegments, blueprint=None) A subclass of :class:`Path` and :class:`PurePosixPath`, this class represents concrete non-Windows filesystem paths:: @@ -709,9 +735,9 @@ calls on path objects. There are three ways to instantiate concrete paths: >>> PosixPath('/etc') PosixPath('/etc') - *pathsegments* is specified similarly to :class:`PurePath`. + *pathsegments* and *blueprint* are specified similarly to :class:`PurePath`. -.. class:: WindowsPath(*pathsegments) +.. class:: WindowsPath(*pathsegments, blueprint=None) A subclass of :class:`Path` and :class:`PureWindowsPath`, this class represents concrete Windows filesystem paths:: @@ -719,7 +745,7 @@ calls on path objects. There are three ways to instantiate concrete paths: >>> WindowsPath('c:/Program Files/') WindowsPath('c:/Program Files') - *pathsegments* is specified similarly to :class:`PurePath`. + *pathsegments* and *blueprint* are specified similarly to :class:`PurePath`. You can only instantiate the class flavour that corresponds to your system (allowing system calls on non-compatible path flavours could lead to diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index a3fce7ccacf7c1..36831f879e8983 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -331,6 +331,9 @@ inspect pathlib ------- +* Add support for subclassing :class:`pathlib.PurePath` and + :class:`~pathlib.Path`, plus their Posix- and Windows-specific variants. + * Add :meth:`~pathlib.Path.walk` for walking the directory trees and generating all file or directory names within them, similar to :func:`os.walk`. (Contributed by Stanislav Zmiev in :gh:`90385`.) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 8eb08949fa9b43..fd95974e6c83bd 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -210,11 +210,10 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase): class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" - __slots__ = ('_pathcls', '_drv', '_root', '_tail') + __slots__ = ('_path', '_drv', '_root', '_tail') def __init__(self, path): - # We don't store the instance to avoid reference cycles - self._pathcls = type(path) + self._path = path self._drv = path.drive self._root = path.root self._tail = path._tail @@ -230,11 +229,11 @@ def __getitem__(self, idx): raise IndexError(idx) if idx < 0: idx += len(self) - return self._pathcls._from_parsed_parts(self._drv, self._root, - self._tail[:-idx - 1]) + return self._path._from_parsed_parts(self._drv, self._root, + self._tail[:-idx - 1]) def __repr__(self): - return "<{}.parents>".format(self._pathcls.__name__) + return "<{}.parents>".format(type(self._path).__name__) class PurePath(object): @@ -299,7 +298,7 @@ def __reduce__(self): # when pickling related paths. return (self.__class__, self.parts) - def __init__(self, *args): + def __init__(self, *args, blueprint=None): paths = [] for arg in args: if isinstance(arg, PurePath): @@ -348,15 +347,14 @@ def _load_parts(self): self._root = root self._tail_cached = tail - @classmethod - def _from_parsed_parts(cls, drv, root, tail): - path = cls._format_parsed_parts(drv, root, tail) - self = cls(path) - self._str = path or '.' - self._drv = drv - self._root = root - self._tail_cached = tail - return self + def _from_parsed_parts(self, drv, root, tail): + path_str = self._format_parsed_parts(drv, root, tail) + path = type(self)(path_str, blueprint=self) + path._str = path_str or '.' + path._drv = drv + path._root = root + path._tail_cached = tail + return path @classmethod def _format_parsed_parts(cls, drv, root, tail): @@ -591,7 +589,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): warnings._deprecated("pathlib.PurePath.relative_to(*args)", msg, remove=(3, 14)) path_cls = type(self) - other = path_cls(other, *_deprecated) + other = path_cls(other, *_deprecated, blueprint=self) for step, path in enumerate([other] + list(other.parents)): if self.is_relative_to(path): break @@ -600,7 +598,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): if step and not walk_up: raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") parts = ['..'] * step + self._tail[len(path._tail):] - return path_cls(*parts) + return path_cls(*parts, blueprint=self) def is_relative_to(self, other, /, *_deprecated): """Return True if the path is relative to another path or False. @@ -611,7 +609,7 @@ def is_relative_to(self, other, /, *_deprecated): "scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath.is_relative_to(*args)", msg, remove=(3, 14)) - other = type(self)(other, *_deprecated) + other = type(self)(other, *_deprecated, blueprint=self) return other == self or other in self.parents @property @@ -623,13 +621,13 @@ def parts(self): else: return tuple(self._tail) - def joinpath(self, *args): + def joinpath(self, *pathsegments): """Combine this path with one or several arguments, and return a new path representing either a subpath (if all arguments are relative paths) or a totally different path (if one of the arguments is anchored). """ - return self.__class__(self, *args) + return type(self)(self, *pathsegments, blueprint=self) def __truediv__(self, key): try: @@ -639,7 +637,7 @@ def __truediv__(self, key): def __rtruediv__(self, key): try: - return type(self)(key, self) + return type(self)(key, self, blueprint=self) except TypeError: return NotImplemented @@ -656,6 +654,8 @@ def parent(self): @property def parents(self): """A sequence of this path's logical parents.""" + # The value of this property should not be cached on the path object, + # as doing so would introduce a reference cycle. return _PathParents(self) def is_absolute(self): @@ -686,7 +686,7 @@ def match(self, path_pattern): """ Return True if this path matches the given pattern. """ - pat = type(self)(path_pattern) + pat = type(self)(path_pattern, blueprint=self) if not pat.parts: raise ValueError("empty pattern") pat_parts = pat._parts_normcase @@ -740,12 +740,12 @@ class Path(PurePath): """ __slots__ = () - def __init__(self, *args, **kwargs): + def __init__(self, *args, blueprint=None, **kwargs): if kwargs: msg = ("support for supplying keyword arguments to pathlib.PurePath " "is deprecated and scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) - super().__init__(*args) + super().__init__(*args, blueprint=blueprint) def __new__(cls, *args, **kwargs): if cls is Path: @@ -761,7 +761,7 @@ def _make_child_relpath(self, name): path_str = f'{path_str}{name}' else: path_str = name - path = type(self)(path_str) + path = type(self)(path_str, blueprint=self) path._str = path_str path._drv = self.drive path._root = self.root @@ -811,7 +811,7 @@ def samefile(self, other_path): try: other_st = other_path.stat() except AttributeError: - other_st = self.__class__(other_path).stat() + other_st = type(self)(other_path, blueprint=self).stat() return self._flavour.samestat(st, other_st) def iterdir(self): @@ -873,7 +873,7 @@ def absolute(self): cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() - return type(self)(cwd, self) + return type(self)(cwd, self, blueprint=self) def resolve(self, strict=False): """ @@ -891,7 +891,7 @@ def check_eloop(e): except OSError as e: check_eloop(e) raise - p = type(self)(s) + p = type(self)(s, blueprint=self) # In non-strict mode, realpath() doesn't raise on symlink loops. # Ensure we get an exception by calling stat() @@ -981,7 +981,7 @@ def readlink(self): """ if not hasattr(os, "readlink"): raise NotImplementedError("os.readlink() not available on this system") - return type(self)(os.readlink(self)) + return type(self)(os.readlink(self), blueprint=self) def touch(self, mode=0o666, exist_ok=True): """ @@ -1070,7 +1070,7 @@ def rename(self, target): Returns the new Path instance pointing to the target path. """ os.rename(self, target) - return self.__class__(target) + return type(self)(target, blueprint=self) def replace(self, target): """ @@ -1083,7 +1083,7 @@ def replace(self, target): Returns the new Path instance pointing to the target path. """ os.replace(self, target) - return self.__class__(target) + return type(self)(target, blueprint=self) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 8b5b61a818bbbc..5bf64563afa7d3 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -29,11 +29,12 @@ # class _BasePurePathSubclass(object): - init_called = False - - def __init__(self, *args): - super().__init__(*args) - self.init_called = True + def __init__(self, *args, blueprint=None, session_id=None): + super().__init__(*args, blueprint=blueprint) + if blueprint: + self.session_id = blueprint.session_id + else: + self.session_id = session_id class _BasePurePathTest(object): @@ -121,20 +122,21 @@ def test_str_subclass_common(self): self._check_str_subclass('a/b.txt') self._check_str_subclass('/a/b.txt') - def test_init_called_common(self): + def test_blueprint_common(self): class P(_BasePurePathSubclass, self.cls): pass - p = P('foo', 'bar') - self.assertTrue((p / 'foo').init_called) - self.assertTrue(('foo' / p).init_called) - self.assertTrue(p.joinpath('foo').init_called) - self.assertTrue(p.with_name('foo').init_called) - self.assertTrue(p.with_stem('foo').init_called) - self.assertTrue(p.with_suffix('.foo').init_called) - self.assertTrue(p.relative_to('foo').init_called) - self.assertTrue(p.parent.init_called) + p = P('foo', 'bar', session_id=42) + self.assertEqual(42, P(blueprint=p).session_id) + self.assertEqual(42, (p / 'foo').session_id) + self.assertEqual(42, ('foo' / p).session_id) + self.assertEqual(42, p.joinpath('foo').session_id) + self.assertEqual(42, p.with_name('foo').session_id) + self.assertEqual(42, p.with_stem('foo').session_id) + self.assertEqual(42, p.with_suffix('.foo').session_id) + self.assertEqual(42, p.relative_to('foo').session_id) + self.assertEqual(42, p.parent.session_id) for parent in p.parents: - self.assertTrue(parent.init_called) + self.assertEqual(42, parent.session_id) def _get_drive_root_parts(self, parts): path = self.cls(*parts) @@ -1647,6 +1649,27 @@ def test_home(self): env['HOME'] = os.path.join(BASE, 'home') self._test_home(self.cls.home()) + def test_blueprint(self): + class P(_BasePurePathSubclass, self.cls): + pass + p = P(BASE, session_id=42) + self.assertEqual(42, P(blueprint=p).session_id) + self.assertEqual(42, p.absolute().session_id) + self.assertEqual(42, p.resolve().session_id) + self.assertEqual(42, P('~', blueprint=p).expanduser().session_id) + self.assertEqual(42, (p / 'fileA').rename(p / 'fileB').session_id) + self.assertEqual(42, (p / 'fileB').replace(p / 'fileA').session_id) + if os_helper.can_symlink(): + self.assertEqual(42, (p / 'linkA').readlink().session_id) + for path in p.iterdir(): + self.assertEqual(42, path.session_id) + for path in p.glob('*'): + self.assertEqual(42, path.session_id) + for path in p.rglob('*'): + self.assertEqual(42, path.session_id) + for dirpath, dirnames, filenames in p.walk(): + self.assertEqual(42, dirpath.session_id) + def test_samefile(self): fileA_path = os.path.join(BASE, 'fileA') fileB_path = os.path.join(BASE, 'dirB', 'fileB') diff --git a/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst b/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst new file mode 100644 index 00000000000000..23d3056795d57f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-04-03-22-02-35.gh-issue-100479.kNBjQm.rst @@ -0,0 +1,4 @@ +Add optional *blueprint* argument to :class:`pathlib.PurePath` and +:class:`~pathlib.Path`. This argument is supplied whenever a derivative path +is created, such as from :attr:`pathlib.PurePath.parent`. Subclasses may use +to pass information to derivative paths. Patch by Barney Gale.