Skip to content

Commit 11c3020

Browse files
barneygalezooba
andauthored
GH-76846, GH-85281: Call __new__() and __init__() on pathlib subclasses (GH-102789)
Fix an issue where `__new__()` and `__init__()` were not called on subclasses of `pathlib.PurePath` and `Path` in some circumstances. Paths are now normalized on-demand. This speeds up path construction, `p.joinpath(q)`, and `p / q`. Co-authored-by: Steve Dower <steve.dower@microsoft.com>
1 parent 2a72125 commit 11c3020

File tree

3 files changed

+107
-68
lines changed

3 files changed

+107
-68
lines changed

Lib/pathlib.py

+78-67
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import warnings
1717
from _collections_abc import Sequence
1818
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
19-
from operator import attrgetter
2019
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
2120
from urllib.parse import quote_from_bytes as urlquote_from_bytes
2221

@@ -216,8 +215,8 @@ class _PathParents(Sequence):
216215
def __init__(self, path):
217216
# We don't store the instance to avoid reference cycles
218217
self._pathcls = type(path)
219-
self._drv = path._drv
220-
self._root = path._root
218+
self._drv = path.drive
219+
self._root = path.root
221220
self._parts = path._parts
222221

223222
def __len__(self):
@@ -251,36 +250,33 @@ class PurePath(object):
251250
directly, regardless of your system.
252251
"""
253252
__slots__ = (
254-
'_drv', '_root', '_parts',
253+
'_raw_path', '_drv', '_root', '_parts_cached',
255254
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
256255
)
257256
_flavour = os.path
258257

259-
def __new__(cls, *args):
258+
def __new__(cls, *args, **kwargs):
260259
"""Construct a PurePath from one or several strings and or existing
261260
PurePath objects. The strings and path objects are combined so as
262261
to yield a canonicalized path, which is incorporated into the
263262
new PurePath object.
264263
"""
265264
if cls is PurePath:
266265
cls = PureWindowsPath if os.name == 'nt' else PurePosixPath
267-
return cls._from_parts(args)
266+
return object.__new__(cls)
268267

269268
def __reduce__(self):
270269
# Using the parts tuple helps share interned path parts
271270
# when pickling related paths.
272-
return (self.__class__, tuple(self._parts))
271+
return (self.__class__, self.parts)
273272

274-
@classmethod
275-
def _parse_parts(cls, parts):
276-
if not parts:
277-
return '', '', []
278-
elif len(parts) == 1:
279-
path = os.fspath(parts[0])
273+
def __init__(self, *args):
274+
if not args:
275+
path = ''
276+
elif len(args) == 1:
277+
path = os.fspath(args[0])
280278
else:
281-
path = cls._flavour.join(*parts)
282-
sep = cls._flavour.sep
283-
altsep = cls._flavour.altsep
279+
path = self._flavour.join(*args)
284280
if isinstance(path, str):
285281
# Force-cast str subclasses to str (issue #21127)
286282
path = str(path)
@@ -289,6 +285,14 @@ def _parse_parts(cls, parts):
289285
"argument should be a str or an os.PathLike "
290286
"object where __fspath__ returns a str, "
291287
f"not {type(path).__name__!r}")
288+
self._raw_path = path
289+
290+
@classmethod
291+
def _parse_path(cls, path):
292+
if not path:
293+
return '', '', []
294+
sep = cls._flavour.sep
295+
altsep = cls._flavour.altsep
292296
if altsep:
293297
path = path.replace(altsep, sep)
294298
drv, root, rel = cls._flavour.splitroot(path)
@@ -299,21 +303,20 @@ def _parse_parts(cls, parts):
299303
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
300304
return drv, root, parsed
301305

302-
@classmethod
303-
def _from_parts(cls, args):
304-
self = object.__new__(cls)
305-
drv, root, parts = self._parse_parts(args)
306+
def _load_parts(self):
307+
drv, root, parts = self._parse_path(self._raw_path)
306308
self._drv = drv
307309
self._root = root
308-
self._parts = parts
309-
return self
310+
self._parts_cached = parts
310311

311312
@classmethod
312313
def _from_parsed_parts(cls, drv, root, parts):
313-
self = object.__new__(cls)
314+
path = cls._format_parsed_parts(drv, root, parts)
315+
self = cls(path)
316+
self._str = path or '.'
314317
self._drv = drv
315318
self._root = root
316-
self._parts = parts
319+
self._parts_cached = parts
317320
return self
318321

319322
@classmethod
@@ -330,7 +333,7 @@ def __str__(self):
330333
try:
331334
return self._str
332335
except AttributeError:
333-
self._str = self._format_parsed_parts(self._drv, self._root,
336+
self._str = self._format_parsed_parts(self.drive, self.root,
334337
self._parts) or '.'
335338
return self._str
336339

@@ -356,7 +359,7 @@ def as_uri(self):
356359
if not self.is_absolute():
357360
raise ValueError("relative path can't be expressed as a file URI")
358361

359-
drive = self._drv
362+
drive = self.drive
360363
if len(drive) == 2 and drive[1] == ':':
361364
# It's a path on a local drive => 'file:///c:/a/b'
362365
prefix = 'file:///' + drive
@@ -412,23 +415,43 @@ def __ge__(self, other):
412415
return NotImplemented
413416
return self._parts_normcase >= other._parts_normcase
414417

415-
drive = property(attrgetter('_drv'),
416-
doc="""The drive prefix (letter or UNC path), if any.""")
418+
@property
419+
def drive(self):
420+
"""The drive prefix (letter or UNC path), if any."""
421+
try:
422+
return self._drv
423+
except AttributeError:
424+
self._load_parts()
425+
return self._drv
426+
427+
@property
428+
def root(self):
429+
"""The root of the path, if any."""
430+
try:
431+
return self._root
432+
except AttributeError:
433+
self._load_parts()
434+
return self._root
417435

418-
root = property(attrgetter('_root'),
419-
doc="""The root of the path, if any.""")
436+
@property
437+
def _parts(self):
438+
try:
439+
return self._parts_cached
440+
except AttributeError:
441+
self._load_parts()
442+
return self._parts_cached
420443

421444
@property
422445
def anchor(self):
423446
"""The concatenation of the drive and root, or ''."""
424-
anchor = self._drv + self._root
447+
anchor = self.drive + self.root
425448
return anchor
426449

427450
@property
428451
def name(self):
429452
"""The final path component, if any."""
430453
parts = self._parts
431-
if len(parts) == (1 if (self._drv or self._root) else 0):
454+
if len(parts) == (1 if (self.drive or self.root) else 0):
432455
return ''
433456
return parts[-1]
434457

@@ -477,7 +500,7 @@ def with_name(self, name):
477500
drv, root, tail = f.splitroot(name)
478501
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
479502
raise ValueError("Invalid name %r" % (name))
480-
return self._from_parsed_parts(self._drv, self._root,
503+
return self._from_parsed_parts(self.drive, self.root,
481504
self._parts[:-1] + [name])
482505

483506
def with_stem(self, stem):
@@ -502,7 +525,7 @@ def with_suffix(self, suffix):
502525
name = name + suffix
503526
else:
504527
name = name[:-len(old_suffix)] + suffix
505-
return self._from_parsed_parts(self._drv, self._root,
528+
return self._from_parsed_parts(self.drive, self.root,
506529
self._parts[:-1] + [name])
507530

508531
def relative_to(self, other, /, *_deprecated, walk_up=False):
@@ -561,22 +584,7 @@ def joinpath(self, *args):
561584
paths) or a totally different path (if one of the arguments is
562585
anchored).
563586
"""
564-
drv1, root1, parts1 = self._drv, self._root, self._parts
565-
drv2, root2, parts2 = self._parse_parts(args)
566-
if root2:
567-
if not drv2 and drv1:
568-
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
569-
else:
570-
return self._from_parsed_parts(drv2, root2, parts2)
571-
elif drv2:
572-
if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
573-
# Same drive => second path is relative to the first.
574-
return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
575-
else:
576-
return self._from_parsed_parts(drv2, root2, parts2)
577-
else:
578-
# Second path is non-anchored (common case).
579-
return self._from_parsed_parts(drv1, root1, parts1 + parts2)
587+
return self.__class__(self._raw_path, *args)
580588

581589
def __truediv__(self, key):
582590
try:
@@ -586,15 +594,15 @@ def __truediv__(self, key):
586594

587595
def __rtruediv__(self, key):
588596
try:
589-
return self._from_parts([key] + self._parts)
597+
return type(self)(key, self._raw_path)
590598
except TypeError:
591599
return NotImplemented
592600

593601
@property
594602
def parent(self):
595603
"""The logical parent of the path."""
596-
drv = self._drv
597-
root = self._root
604+
drv = self.drive
605+
root = self.root
598606
parts = self._parts
599607
if len(parts) == 1 and (drv or root):
600608
return self
@@ -610,7 +618,7 @@ def is_absolute(self):
610618
a drive)."""
611619
# ntpath.isabs() is defective - see GH-44626 .
612620
if self._flavour is ntpath:
613-
return bool(self._drv and self._root)
621+
return bool(self.drive and self.root)
614622
return self._flavour.isabs(self)
615623

616624
def is_reserved(self):
@@ -634,7 +642,7 @@ def match(self, path_pattern):
634642
Return True if this path matches the given pattern.
635643
"""
636644
path_pattern = self._flavour.normcase(path_pattern)
637-
drv, root, pat_parts = self._parse_parts((path_pattern,))
645+
drv, root, pat_parts = self._parse_path(path_pattern)
638646
if not pat_parts:
639647
raise ValueError("empty pattern")
640648
parts = self._parts_normcase
@@ -687,20 +695,23 @@ class Path(PurePath):
687695
"""
688696
__slots__ = ()
689697

690-
def __new__(cls, *args, **kwargs):
698+
def __init__(self, *args, **kwargs):
691699
if kwargs:
692700
msg = ("support for supplying keyword arguments to pathlib.PurePath "
693701
"is deprecated and scheduled for removal in Python {remove}")
694702
warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14))
703+
super().__init__(*args)
704+
705+
def __new__(cls, *args, **kwargs):
695706
if cls is Path:
696707
cls = WindowsPath if os.name == 'nt' else PosixPath
697-
return cls._from_parts(args)
708+
return object.__new__(cls)
698709

699710
def _make_child_relpath(self, part):
700711
# This is an optimization used for dir walking. `part` must be
701712
# a single part relative to this path.
702713
parts = self._parts + [part]
703-
return self._from_parsed_parts(self._drv, self._root, parts)
714+
return self._from_parsed_parts(self.drive, self.root, parts)
704715

705716
def __enter__(self):
706717
# In previous versions of pathlib, __exit__() marked this path as
@@ -770,7 +781,7 @@ def glob(self, pattern):
770781
sys.audit("pathlib.Path.glob", self, pattern)
771782
if not pattern:
772783
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
773-
drv, root, pattern_parts = self._parse_parts((pattern,))
784+
drv, root, pattern_parts = self._parse_path(pattern)
774785
if drv or root:
775786
raise NotImplementedError("Non-relative patterns are unsupported")
776787
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
@@ -785,7 +796,7 @@ def rglob(self, pattern):
785796
this subtree.
786797
"""
787798
sys.audit("pathlib.Path.rglob", self, pattern)
788-
drv, root, pattern_parts = self._parse_parts((pattern,))
799+
drv, root, pattern_parts = self._parse_path(pattern)
789800
if drv or root:
790801
raise NotImplementedError("Non-relative patterns are unsupported")
791802
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
@@ -802,12 +813,12 @@ def absolute(self):
802813
"""
803814
if self.is_absolute():
804815
return self
805-
elif self._drv:
816+
elif self.drive:
806817
# There is a CWD on each drive-letter drive.
807-
cwd = self._flavour.abspath(self._drv)
818+
cwd = self._flavour.abspath(self.drive)
808819
else:
809820
cwd = os.getcwd()
810-
return self._from_parts([cwd] + self._parts)
821+
return type(self)(cwd, self._raw_path)
811822

812823
def resolve(self, strict=False):
813824
"""
@@ -825,7 +836,7 @@ def check_eloop(e):
825836
except OSError as e:
826837
check_eloop(e)
827838
raise
828-
p = self._from_parts((s,))
839+
p = type(self)(s)
829840

830841
# In non-strict mode, realpath() doesn't raise on symlink loops.
831842
# Ensure we get an exception by calling stat()
@@ -915,7 +926,7 @@ def readlink(self):
915926
"""
916927
if not hasattr(os, "readlink"):
917928
raise NotImplementedError("os.readlink() not available on this system")
918-
return self._from_parts((os.readlink(self),))
929+
return type(self)(os.readlink(self))
919930

920931
def touch(self, mode=0o666, exist_ok=True):
921932
"""
@@ -1184,12 +1195,12 @@ def expanduser(self):
11841195
""" Return a new path with expanded ~ and ~user constructs
11851196
(as returned by os.path.expanduser)
11861197
"""
1187-
if (not (self._drv or self._root) and
1198+
if (not (self.drive or self.root) and
11881199
self._parts and self._parts[0][:1] == '~'):
11891200
homedir = self._flavour.expanduser(self._parts[0])
11901201
if homedir[:1] == "~":
11911202
raise RuntimeError("Could not determine home directory.")
1192-
drv, root, parts = self._parse_parts((homedir,))
1203+
drv, root, parts = self._parse_path(homedir)
11931204
return self._from_parsed_parts(drv, root, parts + self._parts[1:])
11941205

11951206
return self

Lib/test/test_pathlib.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
class _BaseFlavourTest(object):
2828

2929
def _check_parse_parts(self, arg, expected):
30-
f = self.cls._parse_parts
30+
def f(parts):
31+
path = self.cls(*parts)._raw_path
32+
return self.cls._parse_path(path)
3133
sep = self.flavour.sep
3234
altsep = self.flavour.altsep
3335
actual = f([x.replace('/', sep) for x in arg])
@@ -136,6 +138,14 @@ def test_parse_parts(self):
136138
# Tests for the pure classes.
137139
#
138140

141+
class _BasePurePathSubclass(object):
142+
init_called = False
143+
144+
def __init__(self, *args):
145+
super().__init__(*args)
146+
self.init_called = True
147+
148+
139149
class _BasePurePathTest(object):
140150

141151
# Keys are canonical paths, values are list of tuples of arguments
@@ -221,6 +231,21 @@ def test_str_subclass_common(self):
221231
self._check_str_subclass('a/b.txt')
222232
self._check_str_subclass('/a/b.txt')
223233

234+
def test_init_called_common(self):
235+
class P(_BasePurePathSubclass, self.cls):
236+
pass
237+
p = P('foo', 'bar')
238+
self.assertTrue((p / 'foo').init_called)
239+
self.assertTrue(('foo' / p).init_called)
240+
self.assertTrue(p.joinpath('foo').init_called)
241+
self.assertTrue(p.with_name('foo').init_called)
242+
self.assertTrue(p.with_stem('foo').init_called)
243+
self.assertTrue(p.with_suffix('.foo').init_called)
244+
self.assertTrue(p.relative_to('foo').init_called)
245+
self.assertTrue(p.parent.init_called)
246+
for parent in p.parents:
247+
self.assertTrue(parent.init_called)
248+
224249
def test_join_common(self):
225250
P = self.cls
226251
p = P('a/b')

0 commit comments

Comments
 (0)