From b428069b063d7641d721f1cf399c086f7e3c9729 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 27 Jan 2023 01:06:24 +0000 Subject: [PATCH 01/13] GH-101362 - Optimise pathlib by deferring path normalisation `PurePath` now normalises and splits paths only when necessary, e.g. when `.name` or `.parent` is accessed. The result is cached. This speeds up path object construction by around 4x. `PurePath.__fspath__()` now returns an unnormalised path, which should be transparent to filesystem APIs (else pathlib's normalisation is broken!). This extends the earlier performance improvement to most impure `Path` methods, and also speeds up pickling, `p.joinpath('bar')` and `p / 'bar'`. This also fixes GH-76846 and GH-85281 by unifying path constructors and adding an `__init__()` method. --- Lib/pathlib.py | 170 +++++++++++++++++++-------------------- Lib/test/test_pathlib.py | 4 +- 2 files changed, 85 insertions(+), 89 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 17659bcd3e2d7f..b1a9e1904bfbb0 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -216,8 +216,8 @@ class _PathParents(Sequence): def __init__(self, path): # We don't store the instance to avoid reference cycles self._pathcls = type(path) - self._drv = path._drv - self._root = path._root + self._drv = path.drive + self._root = path.root self._parts = path._parts def __len__(self): @@ -251,12 +251,12 @@ class PurePath(object): directly, regardless of your system. """ __slots__ = ( - '_drv', '_root', '_parts', + '_fspath', '_drv', '_root', '_parts_cached', '_str', '_hash', '_parts_tuple', '_parts_normcase_cached', ) _flavour = os.path - def __new__(cls, *args): + def __new__(cls, *args, **kwargs): """Construct a PurePath from one or several strings and or existing PurePath objects. The strings and path objects are combined so as to yield a canonicalized path, which is incorporated into the @@ -264,20 +264,32 @@ def __new__(cls, *args): """ if cls is PurePath: cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return cls._from_parts(args) + return super().__new__(cls) def __reduce__(self): - # Using the parts tuple helps share interned path parts - # when pickling related paths. - return (self.__class__, tuple(self._parts)) + return (self.__class__, (self._fspath,)) + + def __init__(self, *args): + if not args: + path = '' + elif len(args) == 1: + path = os.fspath(args[0]) + else: + path = self._flavour.join(*args) + + if not isinstance(path, str): + raise TypeError( + "argument should be a str object or an os.PathLike " + "object returning str, not %r" + % type(path)) + self._fspath = path @classmethod - def _parse_parts(cls, parts): - if not parts: + def _parse_path(cls, path): + if not path: return '', '', [] sep = cls._flavour.sep altsep = cls._flavour.altsep - path = cls._flavour.join(*parts) if altsep: path = path.replace(altsep, sep) drv, root, rel = cls._flavour.splitroot(path) @@ -288,43 +300,18 @@ def _parse_parts(cls, parts): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed - @classmethod - def _parse_args(cls, args): - # This is useful when you don't want to create an instance, just - # canonicalize some constructor arguments. - parts = [] - for a in args: - if isinstance(a, PurePath): - parts += a._parts - else: - a = os.fspath(a) - if isinstance(a, str): - # Force-cast str subclasses to str (issue #21127) - parts.append(str(a)) - else: - raise TypeError( - "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(a)) - return cls._parse_parts(parts) - - @classmethod - def _from_parts(cls, args): - # We need to call _parse_args on the instance, so as to get the - # right flavour. - self = object.__new__(cls) - drv, root, parts = self._parse_args(args) + def _load_parts(self): + drv, root, parts = self._parse_path(self._fspath) self._drv = drv self._root = root - self._parts = parts - return self + self._parts_cached = parts @classmethod def _from_parsed_parts(cls, drv, root, parts): - self = object.__new__(cls) + self = cls(cls._format_parsed_parts(drv, root, parts)) self._drv = drv self._root = root - self._parts = parts + self._parts_cached = parts return self @classmethod @@ -340,12 +327,12 @@ def __str__(self): try: return self._str except AttributeError: - self._str = self._format_parsed_parts(self._drv, self._root, + self._str = self._format_parsed_parts(self.drive, self.root, self._parts) or '.' return self._str def __fspath__(self): - return str(self) + return self._fspath or '.' def as_posix(self): """Return the string representation of the path with forward (/) @@ -356,7 +343,7 @@ def as_posix(self): def __bytes__(self): """Return the bytes representation of the path. This is only recommended to use under Unix.""" - return os.fsencode(self) + return os.fsencode(str(self)) def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) @@ -366,7 +353,7 @@ def as_uri(self): if not self.is_absolute(): raise ValueError("relative path can't be expressed as a file URI") - drive = self._drv + drive = self.drive if len(drive) == 2 and drive[1] == ':': # It's a path on a local drive => 'file:///c:/a/b' prefix = 'file:///' + drive @@ -422,23 +409,43 @@ def __ge__(self, other): return NotImplemented return self._parts_normcase >= other._parts_normcase - drive = property(attrgetter('_drv'), - doc="""The drive prefix (letter or UNC path), if any.""") + @property + def drive(self): + """The drive prefix (letter or UNC path), if any.""" + try: + return self._drv + except AttributeError: + self._load_parts() + return self._drv - root = property(attrgetter('_root'), - doc="""The root of the path, if any.""") + @property + def root(self): + """The root of the path, if any.""" + try: + return self._root + except AttributeError: + self._load_parts() + return self._root + + @property + def _parts(self): + try: + return self._parts_cached + except AttributeError: + self._load_parts() + return self._parts_cached @property def anchor(self): """The concatenation of the drive and root, or ''.""" - anchor = self._drv + self._root + anchor = self.drive + self.root return anchor @property def name(self): """The final path component, if any.""" parts = self._parts - if len(parts) == (1 if (self._drv or self._root) else 0): + if len(parts) == (1 if (self.drive or self.root) else 0): return '' return parts[-1] @@ -487,7 +494,7 @@ def with_name(self, name): drv, root, tail = f.splitroot(name) if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail): raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self._drv, self._root, + return self._from_parsed_parts(self.drive, self.root, self._parts[:-1] + [name]) def with_stem(self, stem): @@ -512,7 +519,7 @@ def with_suffix(self, suffix): name = name + suffix else: name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self._drv, self._root, + return self._from_parsed_parts(self.drive, self.root, self._parts[:-1] + [name]) def relative_to(self, other, /, *_deprecated, walk_up=False): @@ -571,22 +578,7 @@ def joinpath(self, *args): paths) or a totally different path (if one of the arguments is anchored). """ - drv1, root1, parts1 = self._drv, self._root, self._parts - drv2, root2, parts2 = self._parse_args(args) - if root2: - if not drv2 and drv1: - return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:]) - else: - return self._from_parsed_parts(drv2, root2, parts2) - elif drv2: - if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1): - # Same drive => second path is relative to the first. - return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:]) - else: - return self._from_parsed_parts(drv2, root2, parts2) - else: - # Second path is non-anchored (common case). - return self._from_parsed_parts(drv1, root1, parts1 + parts2) + return type(self)(self, *args) def __truediv__(self, key): try: @@ -596,15 +588,15 @@ def __truediv__(self, key): def __rtruediv__(self, key): try: - return self._from_parts([key] + self._parts) + return type(self)(key, self) except TypeError: return NotImplemented @property def parent(self): """The logical parent of the path.""" - drv = self._drv - root = self._root + drv = self.drive + root = self.root parts = self._parts if len(parts) == 1 and (drv or root): return self @@ -620,7 +612,7 @@ def is_absolute(self): a drive).""" # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: - return bool(self._drv and self._root) + return bool(self.drive and self.root) return self._flavour.isabs(self) def is_reserved(self): @@ -644,12 +636,12 @@ def match(self, path_pattern): Return True if this path matches the given pattern. """ path_pattern = self._flavour.normcase(path_pattern) - drv, root, pat_parts = self._parse_parts((path_pattern,)) + drv, root, pat_parts = self._parse_path(path_pattern) if not pat_parts: raise ValueError("empty pattern") - elif drv and drv != self._flavour.normcase(self._drv): + elif drv and drv != self._flavour.normcase(self.drive): return False - elif root and root != self._root: + elif root and root != self.root: return False parts = self._parts_normcase if drv or root: @@ -702,24 +694,26 @@ class Path(PurePath): """ __slots__ = () - def __new__(cls, *args, **kwargs): + def __init__(self, *args, **kwargs): if kwargs: msg = ("support for supplying keyword arguments to pathlib.PurePath " "is deprecated and scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) + super().__init__(*args) + + def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath - self = cls._from_parts(args) - if self._flavour is not os.path: + elif cls._flavour is not os.path: raise NotImplementedError("cannot instantiate %r on your system" % (cls.__name__,)) - return self + return super().__new__(cls) def _make_child_relpath(self, part): # This is an optimization used for dir walking. `part` must be # a single part relative to this path. parts = self._parts + [part] - return self._from_parsed_parts(self._drv, self._root, parts) + return self._from_parsed_parts(self.drive, self.root, parts) def __enter__(self): # In previous versions of pathlib, __exit__() marked this path as @@ -789,7 +783,7 @@ def glob(self, pattern): sys.audit("pathlib.Path.glob", self, pattern) if not pattern: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - drv, root, pattern_parts = self._parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") if pattern[-1] in (self._flavour.sep, self._flavour.altsep): @@ -804,7 +798,7 @@ def rglob(self, pattern): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - drv, root, pattern_parts = self._parse_parts((pattern,)) + drv, root, pattern_parts = self._parse_path(pattern) if drv or root: raise NotImplementedError("Non-relative patterns are unsupported") if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): @@ -821,7 +815,7 @@ def absolute(self): """ if self.is_absolute(): return self - return self._from_parts([os.getcwd()] + self._parts) + return type(self)(os.getcwd(), *self._parts) def resolve(self, strict=False): """ @@ -839,7 +833,7 @@ def check_eloop(e): except OSError as e: check_eloop(e) raise - p = self._from_parts((s,)) + p = type(self)(s) # In non-strict mode, realpath() doesn't raise on symlink loops. # Ensure we get an exception by calling stat() @@ -929,7 +923,7 @@ def readlink(self): """ if not hasattr(os, "readlink"): raise NotImplementedError("os.readlink() not available on this system") - return self._from_parts((os.readlink(self),)) + return type(self)(os.readlink(self)) def touch(self, mode=0o666, exist_ok=True): """ @@ -1198,12 +1192,12 @@ def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) """ - if (not (self._drv or self._root) and + if (not (self.drive or self.root) and self._parts and self._parts[0][:1] == '~'): homedir = self._flavour.expanduser(self._parts[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - return self._from_parts([homedir] + self._parts[1:]) + return type(self)(homedir, *self._parts[1:]) return self diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index a596795b44f0fa..725164dcc17966 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -26,7 +26,9 @@ class _BaseFlavourTest(object): def _check_parse_parts(self, arg, expected): - f = self.cls._parse_parts + def f(parts): + path = self.flavour.join(*parts) if parts else '' + return self.cls._parse_path(path) sep = self.flavour.sep altsep = self.flavour.altsep actual = f([x.replace('/', sep) for x in arg]) From e49e7197c18543f8b71c197b20eda7c1858dd1c2 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 4 Feb 2023 16:30:14 +0000 Subject: [PATCH 02/13] Restore str force-casting behaviour; reduce diff a little. --- Lib/pathlib.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b1a9e1904bfbb0..5c8dfb06c4d223 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -269,21 +269,6 @@ def __new__(cls, *args, **kwargs): def __reduce__(self): return (self.__class__, (self._fspath,)) - def __init__(self, *args): - if not args: - path = '' - elif len(args) == 1: - path = os.fspath(args[0]) - else: - path = self._flavour.join(*args) - - if not isinstance(path, str): - raise TypeError( - "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(path)) - self._fspath = path - @classmethod def _parse_path(cls, path): if not path: @@ -300,6 +285,28 @@ def _parse_path(cls, path): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed + def __init__(self, *args): + parts = [] + for a in args: + if isinstance(a, PurePath): + parts += a._parts + else: + a = os.fspath(a) + if isinstance(a, str): + # Force-cast str subclasses to str (issue #21127) + parts.append(str(a)) + else: + raise TypeError( + "argument should be a str object or an os.PathLike " + "object returning str, not %r" + % type(a)) + if not parts: + self._fspath = '' + elif len(parts) == 1: + self._fspath = os.fspath(parts[0]) + else: + self._fspath = self._flavour.join(*parts) + def _load_parts(self): drv, root, parts = self._parse_path(self._fspath) self._drv = drv From a931986fa440c7d8f30fdd62135983bde2b8925a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 4 Feb 2023 16:44:40 +0000 Subject: [PATCH 03/13] Fix pathlib usage error in importlib --- Lib/importlib/metadata/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 40ab1a1aaac328..7e79b28754fe3b 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -748,7 +748,10 @@ def read_text(self, filename): NotADirectoryError, PermissionError, ): - return self._path.joinpath(filename).read_text(encoding='utf-8') + path = self._path + if filename: + path /= filename + return path.read_text(encoding='utf-8') read_text.__doc__ = Distribution.read_text.__doc__ From 38f70bf2f785ac37bc970b7789b0f29047144af5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 4 Feb 2023 16:58:58 +0000 Subject: [PATCH 04/13] Improve initialiser performance. --- Lib/pathlib.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 5c8dfb06c4d223..befc26f22cb78b 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -286,26 +286,21 @@ def _parse_path(cls, path): return drv, root, parsed def __init__(self, *args): - parts = [] - for a in args: - if isinstance(a, PurePath): - parts += a._parts + if args: + if len(args) == 1: + path = os.fspath(args[0]) else: - a = os.fspath(a) - if isinstance(a, str): - # Force-cast str subclasses to str (issue #21127) - parts.append(str(a)) - else: - raise TypeError( - "argument should be a str object or an os.PathLike " - "object returning str, not %r" - % type(a)) - if not parts: - self._fspath = '' - elif len(parts) == 1: - self._fspath = os.fspath(parts[0]) + path = self._flavour.join(*args) + if isinstance(path, str): + # Force-cast str subclasses to str (issue #21127) + self._fspath = str(path) + else: + raise TypeError( + "argument should be a str object or an os.PathLike " + "object returning str, not %r" + % type(path)) else: - self._fspath = self._flavour.join(*parts) + self._fspath = '' def _load_parts(self): drv, root, parts = self._parse_path(self._fspath) From 10844a64085292c249aad7dbc423de6bd35caa30 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 4 Feb 2023 17:55:49 +0000 Subject: [PATCH 05/13] Add NEWS blurb --- .../Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst diff --git a/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst new file mode 100644 index 00000000000000..0aca6df0030805 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst @@ -0,0 +1,4 @@ +Speed up construction and joining of :class:`pathlib.PurePath` objects by +deferring path parsing and normalization until needed. Some path object +operations are 2-4x faster as a result of this change. An unnormalized path +is now returned by :func:`os.fspath` when a pathlib path object is given. From d5231b666939b3edbd97bd79759619462046fb21 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 4 Feb 2023 21:51:22 +0000 Subject: [PATCH 06/13] Store '_fspath' as non-empty string. --- Lib/pathlib.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index befc26f22cb78b..340ec87f09352e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -293,14 +293,14 @@ def __init__(self, *args): path = self._flavour.join(*args) if isinstance(path, str): # Force-cast str subclasses to str (issue #21127) - self._fspath = str(path) + self._fspath = str(path) or '.' else: raise TypeError( "argument should be a str object or an os.PathLike " "object returning str, not %r" % type(path)) else: - self._fspath = '' + self._fspath = '.' def _load_parts(self): drv, root, parts = self._parse_path(self._fspath) @@ -310,7 +310,9 @@ def _load_parts(self): @classmethod def _from_parsed_parts(cls, drv, root, parts): - self = cls(cls._format_parsed_parts(drv, root, parts)) + path = cls._format_parsed_parts(drv, root, parts) + self = cls(path) + self._str = path self._drv = drv self._root = root self._parts_cached = parts @@ -320,8 +322,10 @@ def _from_parsed_parts(cls, drv, root, parts): def _format_parsed_parts(cls, drv, root, parts): if drv or root: return drv + root + cls._flavour.sep.join(parts[1:]) - else: + elif parts: return cls._flavour.sep.join(parts) + else: + return '.' def __str__(self): """Return the string representation of the path, suitable for @@ -330,11 +334,11 @@ def __str__(self): return self._str except AttributeError: self._str = self._format_parsed_parts(self.drive, self.root, - self._parts) or '.' + self._parts) return self._str def __fspath__(self): - return self._fspath or '.' + return self._fspath def as_posix(self): """Return the string representation of the path with forward (/) From cc2c7111d73b3b33008fe3d848d99655a249bd76 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 6 Mar 2023 02:03:36 +0000 Subject: [PATCH 07/13] Undo addition of `__init__()` and change to `_from_parsed_parts()` --- Lib/pathlib.py | 78 ++++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 37 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 26babb4032d609..b7ed0bf60b5428 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -256,7 +256,7 @@ class PurePath(object): ) _flavour = os.path - def __new__(cls, *args, **kwargs): + def __new__(cls, *args): """Construct a PurePath from one or several strings and or existing PurePath objects. The strings and path objects are combined so as to yield a canonicalized path, which is incorporated into the @@ -264,11 +264,29 @@ def __new__(cls, *args, **kwargs): """ if cls is PurePath: cls = PureWindowsPath if os.name == 'nt' else PurePosixPath - return super().__new__(cls) + return cls._from_parts(args) def __reduce__(self): return (self.__class__, (self._fspath,)) + @classmethod + def _join_parts(cls, parts): + if not parts: + return '' + elif len(parts) == 1: + path = os.fspath(parts[0]) + else: + path = cls._flavour.join(*parts) + if isinstance(path, str): + # Force-cast str subclasses to str (issue #21127) + path = str(path) + else: + raise TypeError( + "argument should be a str or an os.PathLike " + "object where __fspath__ returns a str, " + f"not {type(path).__name__!r}") + return path + @classmethod def _parse_path(cls, path): if not path: @@ -285,22 +303,11 @@ def _parse_path(cls, path): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed - def __init__(self, *args): - if args: - if len(args) == 1: - path = os.fspath(args[0]) - else: - path = self._flavour.join(*args) - if isinstance(path, str): - # Force-cast str subclasses to str (issue #21127) - self._fspath = str(path) or '.' - else: - raise TypeError( - "argument should be a str or an os.PathLike " - "object where __fspath__ returns a str, " - f"not {type(path).__name__!r}") - else: - self._fspath = '.' + @classmethod + def _from_parts(cls, args): + self = object.__new__(cls) + self._fspath = cls._join_parts(args) + return self def _load_parts(self): drv, root, parts = self._parse_path(self._fspath) @@ -310,9 +317,7 @@ def _load_parts(self): @classmethod def _from_parsed_parts(cls, drv, root, parts): - path = cls._format_parsed_parts(drv, root, parts) - self = cls(path) - self._str = path + self = object.__new__(cls) self._drv = drv self._root = root self._parts_cached = parts @@ -322,10 +327,8 @@ def _from_parsed_parts(cls, drv, root, parts): def _format_parsed_parts(cls, drv, root, parts): if drv or root: return drv + root + cls._flavour.sep.join(parts[1:]) - elif parts: - return cls._flavour.sep.join(parts) else: - return '.' + return cls._flavour.sep.join(parts) def __str__(self): """Return the string representation of the path, suitable for @@ -334,11 +337,15 @@ def __str__(self): return self._str except AttributeError: self._str = self._format_parsed_parts(self.drive, self.root, - self._parts) + self._parts) or '.' return self._str def __fspath__(self): - return self._fspath + try: + return self._fspath or '.' + except AttributeError: + self._fspath = str(self) + return self._fspath def as_posix(self): """Return the string representation of the path with forward (/) @@ -584,7 +591,7 @@ def joinpath(self, *args): paths) or a totally different path (if one of the arguments is anchored). """ - return type(self)(self, *args) + return self._from_parts((self,) + args) def __truediv__(self, key): try: @@ -594,7 +601,7 @@ def __truediv__(self, key): def __rtruediv__(self, key): try: - return type(self)(key, self) + return self._from_parts([key] + self._parts) except TypeError: return NotImplemented @@ -695,17 +702,14 @@ class Path(PurePath): """ __slots__ = () - def __init__(self, *args, **kwargs): + def __new__(cls, *args, **kwargs): if kwargs: msg = ("support for supplying keyword arguments to pathlib.PurePath " "is deprecated and scheduled for removal in Python {remove}") warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14)) - super().__init__(*args) - - def __new__(cls, *args, **kwargs): if cls is Path: cls = WindowsPath if os.name == 'nt' else PosixPath - return super().__new__(cls) + return cls._from_parts(args) def _make_child_relpath(self, part): # This is an optimization used for dir walking. `part` must be @@ -818,7 +822,7 @@ def absolute(self): cwd = self._flavour.abspath(self.drive) else: cwd = os.getcwd() - return type(self)(cwd, *self._parts) + return self._from_parts([cwd] + self._parts) def resolve(self, strict=False): """ @@ -836,7 +840,7 @@ def check_eloop(e): except OSError as e: check_eloop(e) raise - p = type(self)(s) + p = self._from_parts((s,)) # In non-strict mode, realpath() doesn't raise on symlink loops. # Ensure we get an exception by calling stat() @@ -926,7 +930,7 @@ def readlink(self): """ if not hasattr(os, "readlink"): raise NotImplementedError("os.readlink() not available on this system") - return type(self)(os.readlink(self)) + return self._from_parts((os.readlink(self),)) def touch(self, mode=0o666, exist_ok=True): """ @@ -1200,7 +1204,7 @@ def expanduser(self): homedir = self._flavour.expanduser(self._parts[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - return type(self)(homedir, *self._parts[1:]) + return self._from_parts([homedir] + self._parts[1:]) return self From cbf0fcd2890f3a0ce66bca91b8027d3432212f03 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 6 Mar 2023 02:27:17 +0000 Subject: [PATCH 08/13] Fix pickling of paths created via walking. --- Lib/pathlib.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b7ed0bf60b5428..47bdc283351049 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -267,7 +267,9 @@ def __new__(cls, *args): return cls._from_parts(args) def __reduce__(self): - return (self.__class__, (self._fspath,)) + # Using the parts tuple helps share interned path parts + # when pickling related paths. + return (self.__class__, self.parts) @classmethod def _join_parts(cls, parts): From eb7087f7a9f44823071559ba363c8cd0579a1981 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 6 Mar 2023 02:36:27 +0000 Subject: [PATCH 09/13] Simplify patch slightly --- Lib/pathlib.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 47bdc283351049..1de4ab69e7e47f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -272,9 +272,9 @@ def __reduce__(self): return (self.__class__, self.parts) @classmethod - def _join_parts(cls, parts): + def _from_parts(cls, parts): if not parts: - return '' + path = '' elif len(parts) == 1: path = os.fspath(parts[0]) else: @@ -287,7 +287,9 @@ def _join_parts(cls, parts): "argument should be a str or an os.PathLike " "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") - return path + self = object.__new__(cls) + self._fspath = path + return self @classmethod def _parse_path(cls, path): @@ -305,12 +307,6 @@ def _parse_path(cls, path): parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed - @classmethod - def _from_parts(cls, args): - self = object.__new__(cls) - self._fspath = cls._join_parts(args) - return self - def _load_parts(self): drv, root, parts = self._parse_path(self._fspath) self._drv = drv From 3b53e277b7d4eda2e87f6c51951dd0e6eebd3483 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 11 Mar 2023 21:55:26 +0000 Subject: [PATCH 10/13] Remove unused import --- Lib/pathlib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1de4ab69e7e47f..ad3c375ea6ecdb 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -16,7 +16,6 @@ import warnings from _collections_abc import Sequence from errno import ENOENT, ENOTDIR, EBADF, ELOOP -from operator import attrgetter from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes From d9a6080f78c2ffa1e4bc0f6c2d3171c36719c94e Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 11 Mar 2023 22:16:28 +0000 Subject: [PATCH 11/13] Fix dodgy merge, add comment about _from_parsed_parts() --- Lib/pathlib.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c1953b529394f0..2d53b081b53f5a 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -342,6 +342,7 @@ def __fspath__(self): try: return self._fspath or '.' except AttributeError: + # The _from_parsed_parts() constructor does not set _fspath. self._fspath = str(self) return self._fspath @@ -1202,7 +1203,7 @@ def expanduser(self): homedir = self._flavour.expanduser(self._parts[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - drv, root, parts = self._parse_parts((homedir,)) + drv, root, parts = self._parse_path(homedir) return self._from_parsed_parts(drv, root, parts + self._parts[1:]) return self From 9650dca6ee972f8bb53198bfc7860a1e6dc510b0 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 17 Mar 2023 15:43:28 +0000 Subject: [PATCH 12/13] Stop returning unnormalised path from `__fspath__()` --- Lib/importlib/metadata/__init__.py | 5 +---- Lib/pathlib.py | 9 ++------- .../2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst | 3 +-- 3 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 7e79b28754fe3b..40ab1a1aaac328 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -748,10 +748,7 @@ def read_text(self, filename): NotADirectoryError, PermissionError, ): - path = self._path - if filename: - path /= filename - return path.read_text(encoding='utf-8') + return self._path.joinpath(filename).read_text(encoding='utf-8') read_text.__doc__ = Distribution.read_text.__doc__ diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 2d53b081b53f5a..dd11bd60d36be3 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -339,12 +339,7 @@ def __str__(self): return self._str def __fspath__(self): - try: - return self._fspath or '.' - except AttributeError: - # The _from_parsed_parts() constructor does not set _fspath. - self._fspath = str(self) - return self._fspath + return str(self) def as_posix(self): """Return the string representation of the path with forward (/) @@ -355,7 +350,7 @@ def as_posix(self): def __bytes__(self): """Return the bytes representation of the path. This is only recommended to use under Unix.""" - return os.fsencode(str(self)) + return os.fsencode(self) def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) diff --git a/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst index 0aca6df0030805..1d11c5a1443ae7 100644 --- a/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst +++ b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst @@ -1,4 +1,3 @@ Speed up construction and joining of :class:`pathlib.PurePath` objects by deferring path parsing and normalization until needed. Some path object -operations are 2-4x faster as a result of this change. An unnormalized path -is now returned by :func:`os.fspath` when a pathlib path object is given. +operations are 2-4x faster as a result of this change. From 97995bd2c3ea3c31bb420fab61d55b4ff0332bd0 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 17 Mar 2023 18:05:03 +0000 Subject: [PATCH 13/13] Undo change to `joinpath()`, fix news blurb. --- Lib/pathlib.py | 17 ++++++++++++++++- ...23-02-04-17-47-08.gh-issue-101362.uZjk9-.rst | 5 ++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index dd11bd60d36be3..d45b122a0fa305 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -585,7 +585,22 @@ def joinpath(self, *args): paths) or a totally different path (if one of the arguments is anchored). """ - return self._from_parts((self,) + args) + drv1, root1, parts1 = self._drv, self._root, self._parts + drv2, root2, parts2 = self._parse_parts(args) + if root2: + if not drv2 and drv1: + return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:]) + else: + return self._from_parsed_parts(drv2, root2, parts2) + elif drv2: + if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1): + # Same drive => second path is relative to the first. + return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:]) + else: + return self._from_parsed_parts(drv2, root2, parts2) + else: + # Second path is non-anchored (common case). + return self._from_parsed_parts(drv1, root1, parts1 + parts2) def __truediv__(self, key): try: diff --git a/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst index 1d11c5a1443ae7..cbcab09342d592 100644 --- a/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst +++ b/Misc/NEWS.d/next/Library/2023-02-04-17-47-08.gh-issue-101362.uZjk9-.rst @@ -1,3 +1,2 @@ -Speed up construction and joining of :class:`pathlib.PurePath` objects by -deferring path parsing and normalization until needed. Some path object -operations are 2-4x faster as a result of this change. +Speed up construction of :class:`pathlib.PurePath` objects by deferring +path parsing and normalization.