Skip to content

Commit 59dcdb1

Browse files
committed
pythonGH-77609: Support following symlinks in pathlib.Path.glob()
Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and `rglob()`, defaulting to false. When set to true, symlinks to directories are followed as if they were directories. Previously these methods followed symlinks except when evaluating "`**`" wildcards; on Windows they returned paths in filesystem casing except when evaluating non-wildcard tokens. Both these problems are solved here. This will allow us to address pythonGH-102613 and pythonGH-81079 in future commits.
1 parent bb396ee commit 59dcdb1

File tree

4 files changed

+85
-79
lines changed

4 files changed

+85
-79
lines changed

Doc/library/pathlib.rst

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist).
852852
.. versionadded:: 3.5
853853

854854

855-
.. method:: Path.glob(pattern)
855+
.. method:: Path.glob(pattern, *, follow_symlinks=False)
856856

857857
Glob the given relative *pattern* in the directory represented by this path,
858858
yielding all matching files (of any kind)::
@@ -873,6 +873,9 @@ call fails (for example because the path doesn't exist).
873873
PosixPath('setup.py'),
874874
PosixPath('test_pathlib.py')]
875875

876+
By default, :meth:`Path.glob` does not follow symlinks. Set
877+
*follow_symlinks* to true to visit symlinks to directories.
878+
876879
.. note::
877880
Using the "``**``" pattern in large directory trees may consume
878881
an inordinate amount of time.
@@ -883,6 +886,10 @@ call fails (for example because the path doesn't exist).
883886
Return only directories if *pattern* ends with a pathname components
884887
separator (:data:`~os.sep` or :data:`~os.altsep`).
885888

889+
.. versionchanged:: 3.12
890+
The *follow_symlinks* parameter was added. In previous versions,
891+
symlinks were followed except when expanding "``**``" wildcards.
892+
886893
.. method:: Path.group()
887894

888895
Return the name of the group owning the file. :exc:`KeyError` is raised
@@ -1268,7 +1275,7 @@ call fails (for example because the path doesn't exist).
12681275
.. versionadded:: 3.6
12691276
The *strict* argument (pre-3.6 behavior is strict).
12701277

1271-
.. method:: Path.rglob(pattern)
1278+
.. method:: Path.rglob(pattern, *, follow_symlinks=False)
12721279

12731280
Glob the given relative *pattern* recursively. This is like calling
12741281
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@@ -1281,12 +1288,19 @@ call fails (for example because the path doesn't exist).
12811288
PosixPath('setup.py'),
12821289
PosixPath('test_pathlib.py')]
12831290

1291+
By default, :meth:`Path.rglob` does not follow symlinks. Set
1292+
*follow_symlinks* to true to visit symlinks to directories.
1293+
12841294
.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
12851295

12861296
.. versionchanged:: 3.11
12871297
Return only directories if *pattern* ends with a pathname components
12881298
separator (:data:`~os.sep` or :data:`~os.altsep`).
12891299

1300+
.. versionchanged:: 3.12
1301+
The *follow_symlinks* parameter was added. In previous versions,
1302+
symlinks were followed except when expanding "``**``" wildcards.
1303+
12901304
.. method:: Path.rmdir()
12911305

12921306
Remove this directory. The directory must be empty.

Lib/pathlib.py

Lines changed: 18 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,6 @@ def _ignore_error(exception):
5454
return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or
5555
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
5656

57-
58-
def _is_wildcard_pattern(pat):
59-
# Whether this pattern needs actual matching using fnmatch, or can
60-
# be looked up directly as a file.
61-
return "*" in pat or "?" in pat or "[" in pat
62-
6357
#
6458
# Globbing helpers
6559
#
@@ -74,10 +68,8 @@ def _make_selector(pattern_parts, flavour):
7468
cls = _RecursiveWildcardSelector
7569
elif '**' in pat:
7670
raise ValueError("Invalid pattern: '**' can only be an entire path component")
77-
elif _is_wildcard_pattern(pat):
78-
cls = _WildcardSelector
7971
else:
80-
cls = _PreciseSelector
72+
cls = _WildcardSelector
8173
return cls(pat, child_parts, flavour)
8274

8375

@@ -94,48 +86,28 @@ def __init__(self, child_parts, flavour):
9486
self.successor = _TerminatingSelector()
9587
self.dironly = False
9688

97-
def select_from(self, parent_path):
89+
def select_from(self, parent_path, follow_symlinks):
9890
"""Iterate over all child paths of `parent_path` matched by this
9991
selector. This can contain parent_path itself."""
10092
path_cls = type(parent_path)
101-
is_dir = path_cls.is_dir
102-
exists = path_cls.exists
10393
scandir = path_cls._scandir
10494
normcase = path_cls._flavour.normcase
105-
if not is_dir(parent_path):
106-
return iter([])
107-
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
95+
return self._select_from(parent_path, follow_symlinks, scandir, normcase)
10896

10997

11098
class _TerminatingSelector:
11199

112-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
100+
def _select_from(self, parent_path, follow_symlinks, scandir, normcase):
113101
yield parent_path
114102

115103

116-
class _PreciseSelector(_Selector):
117-
118-
def __init__(self, name, child_parts, flavour):
119-
self.name = name
120-
_Selector.__init__(self, child_parts, flavour)
121-
122-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
123-
try:
124-
path = parent_path._make_child_relpath(self.name)
125-
if (is_dir if self.dironly else exists)(path):
126-
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
127-
yield p
128-
except PermissionError:
129-
return
130-
131-
132104
class _WildcardSelector(_Selector):
133105

134106
def __init__(self, pat, child_parts, flavour):
135107
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
136108
_Selector.__init__(self, child_parts, flavour)
137109

138-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
110+
def _select_from(self, parent_path, follow_symlinks, scandir, normcase):
139111
try:
140112
# We must close the scandir() object before proceeding to
141113
# avoid exhausting file descriptors when globbing deep trees.
@@ -147,7 +119,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
147119
# "entry.is_dir()" can raise PermissionError
148120
# in some cases (see bpo-38894), which is not
149121
# among the errors ignored by _ignore_error()
150-
if not entry.is_dir():
122+
if not entry.is_dir(follow_symlinks=follow_symlinks):
151123
continue
152124
except OSError as e:
153125
if not _ignore_error(e):
@@ -156,7 +128,7 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
156128
name = entry.name
157129
if self.match(normcase(name)):
158130
path = parent_path._make_child_relpath(name)
159-
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
131+
for p in self.successor._select_from(path, follow_symlinks, scandir, normcase):
160132
yield p
161133
except PermissionError:
162134
return
@@ -167,7 +139,7 @@ class _RecursiveWildcardSelector(_Selector):
167139
def __init__(self, pat, child_parts, flavour):
168140
_Selector.__init__(self, child_parts, flavour)
169141

170-
def _iterate_directories(self, parent_path, is_dir, scandir):
142+
def _iterate_directories(self, parent_path, follow_symlinks, scandir):
171143
yield parent_path
172144
try:
173145
# We must close the scandir() object before proceeding to
@@ -177,24 +149,24 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
177149
for entry in entries:
178150
entry_is_dir = False
179151
try:
180-
entry_is_dir = entry.is_dir()
152+
entry_is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
181153
except OSError as e:
182154
if not _ignore_error(e):
183155
raise
184-
if entry_is_dir and not entry.is_symlink():
156+
if entry_is_dir:
185157
path = parent_path._make_child_relpath(entry.name)
186-
for p in self._iterate_directories(path, is_dir, scandir):
158+
for p in self._iterate_directories(path, follow_symlinks, scandir):
187159
yield p
188160
except PermissionError:
189161
return
190162

191-
def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
163+
def _select_from(self, parent_path, follow_symlinks, scandir, normcase):
192164
try:
193165
yielded = set()
194166
try:
195167
successor_select = self.successor._select_from
196-
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
197-
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
168+
for starting_point in self._iterate_directories(parent_path, follow_symlinks, scandir):
169+
for p in successor_select(starting_point, follow_symlinks, scandir, normcase):
198170
if p not in yielded:
199171
yield p
200172
yielded.add(p)
@@ -763,7 +735,7 @@ def _scandir(self):
763735
# includes scandir(), which is used to implement glob().
764736
return os.scandir(self)
765737

766-
def glob(self, pattern):
738+
def glob(self, pattern, *, follow_symlinks=False):
767739
"""Iterate over this subtree and yield all existing files (of any
768740
kind, including directories) matching the given relative pattern.
769741
"""
@@ -776,10 +748,10 @@ def glob(self, pattern):
776748
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
777749
pattern_parts.append('')
778750
selector = _make_selector(tuple(pattern_parts), self._flavour)
779-
for p in selector.select_from(self):
751+
for p in selector.select_from(self, follow_symlinks):
780752
yield p
781753

782-
def rglob(self, pattern):
754+
def rglob(self, pattern, *, follow_symlinks=False):
783755
"""Recursively yield all existing files (of any kind, including
784756
directories) matching the given relative pattern, anywhere in
785757
this subtree.
@@ -791,7 +763,7 @@ def rglob(self, pattern):
791763
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
792764
pattern_parts.append('')
793765
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
794-
for p in selector.select_from(self):
766+
for p in selector.select_from(self, follow_symlinks):
795767
yield p
796768

797769
def absolute(self):

Lib/test/test_pathlib.py

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,22 +1760,25 @@ def _check(glob, expected):
17601760
_check(p.glob("dir*/file*"), ["dirB/fileB", "dirC/fileC"])
17611761
if not os_helper.can_symlink():
17621762
_check(p.glob("*A"), ['dirA', 'fileA'])
1763-
else:
1764-
_check(p.glob("*A"), ['dirA', 'fileA', 'linkA'])
1765-
if not os_helper.can_symlink():
17661763
_check(p.glob("*B/*"), ['dirB/fileB'])
17671764
else:
1768-
_check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD',
1769-
'linkB/fileB', 'linkB/linkD'])
1770-
if not os_helper.can_symlink():
1771-
_check(p.glob("*/fileB"), ['dirB/fileB'])
1772-
else:
1773-
_check(p.glob("*/fileB"), ['dirB/fileB', 'linkB/fileB'])
1765+
_check(p.glob("*A"), ['dirA', 'fileA', 'linkA'])
1766+
_check(p.glob("*B/*"), ['dirB/fileB', 'dirB/linkD'])
1767+
_check(p.glob("*/fileB"), ['dirB/fileB'])
1768+
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE"])
17741769

1775-
if not os_helper.can_symlink():
1776-
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE"])
1777-
else:
1778-
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])
1770+
@os_helper.skip_unless_symlink
1771+
def test_glob_follow_symlinks_common(self):
1772+
def _check(path, glob, expected):
1773+
self.assertEqual(set(path.glob(glob, follow_symlinks=True)), { P(BASE, q) for q in expected })
1774+
P = self.cls
1775+
p = P(BASE)
1776+
_check(p, "fileB", [])
1777+
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
1778+
_check(p, "*A", ['dirA', 'fileA', 'linkA'])
1779+
_check(p, "*B/*", ['dirB/fileB', 'dirB/linkD', 'linkB/fileB', 'linkB/linkD'])
1780+
_check(p, "*/fileB", ['dirB/fileB', 'linkB/fileB'])
1781+
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
17791782

17801783
def test_rglob_common(self):
17811784
def _check(glob, expected):
@@ -1787,22 +1790,10 @@ def _check(glob, expected):
17871790
_check(it, ["fileA"])
17881791
_check(p.rglob("fileB"), ["dirB/fileB"])
17891792
_check(p.rglob("*/fileA"), [])
1790-
if not os_helper.can_symlink():
1791-
_check(p.rglob("*/fileB"), ["dirB/fileB"])
1792-
else:
1793-
_check(p.rglob("*/fileB"), ["dirB/fileB", "dirB/linkD/fileB",
1794-
"linkB/fileB", "dirA/linkC/fileB"])
1793+
_check(p.rglob("*/fileB"), ["dirB/fileB"])
17951794
_check(p.rglob("file*"), ["fileA", "dirB/fileB",
17961795
"dirC/fileC", "dirC/dirD/fileD"])
1797-
if not os_helper.can_symlink():
1798-
_check(p.rglob("*/"), [
1799-
"dirA", "dirB", "dirC", "dirC/dirD", "dirE",
1800-
])
1801-
else:
1802-
_check(p.rglob("*/"), [
1803-
"dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC",
1804-
"dirC/dirD", "dirE", "linkB",
1805-
])
1796+
_check(p.rglob("*/"), ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
18061797
_check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])
18071798

18081799
p = P(BASE, "dirC")
@@ -1816,6 +1807,33 @@ def _check(glob, expected):
18161807
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
18171808
_check(p.rglob("*.*"), ["dirC/novel.txt"])
18181809

1810+
@os_helper.skip_unless_symlink
1811+
def test_rglob_follow_symlinks_common(self):
1812+
def _check(path, glob, expected):
1813+
actual = {path for path in path.rglob(glob, follow_symlinks=True)
1814+
if 'linkD' not in path.parts} # exclude symlink loop.
1815+
self.assertEqual(actual, { P(BASE, q) for q in expected })
1816+
P = self.cls
1817+
p = P(BASE)
1818+
_check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
1819+
_check(p, "*/fileA", [])
1820+
_check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
1821+
_check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB",
1822+
"dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"])
1823+
_check(p, "*/", ["dirA", "dirA/linkC", "dirB", "dirC", "dirC/dirD", "dirE", "linkB"])
1824+
_check(p, "", ["", "dirA", "dirA/linkC", "dirB", "dirC", "dirE", "dirC/dirD", "linkB"])
1825+
1826+
p = P(BASE, "dirC")
1827+
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
1828+
"dirC/dirD", "dirC/dirD/fileD"])
1829+
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
1830+
_check(p, "*/*", ["dirC/dirD/fileD"])
1831+
_check(p, "*/", ["dirC/dirD"])
1832+
_check(p, "", ["dirC", "dirC/dirD"])
1833+
# gh-91616, a re module regression
1834+
_check(p, "*.txt", ["dirC/novel.txt"])
1835+
_check(p, "*.*", ["dirC/novel.txt"])
1836+
18191837
@os_helper.skip_unless_symlink
18201838
def test_rglob_symlink_loop(self):
18211839
# Don't get fooled by symlink loops (Issue #26012).
@@ -1856,8 +1874,8 @@ def test_glob_dotdot(self):
18561874
# ".." is not special in globs.
18571875
P = self.cls
18581876
p = P(BASE)
1859-
self.assertEqual(set(p.glob("..")), { P(BASE, "..") })
1860-
self.assertEqual(set(p.glob("dirA/../file*")), { P(BASE, "dirA/../fileA") })
1877+
self.assertEqual(set(p.glob("..")), set())
1878+
self.assertEqual(set(p.glob("dirA/../file*")), set())
18611879
self.assertEqual(set(p.glob("../xyzzy")), set())
18621880

18631881
@os_helper.skip_unless_symlink
@@ -3053,15 +3071,15 @@ def test_glob(self):
30533071
self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
30543072
self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
30553073
self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
3056-
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
3074+
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
30573075
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})
30583076

30593077
def test_rglob(self):
30603078
P = self.cls
30613079
p = P(BASE, "dirC")
30623080
self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
30633081
self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
3064-
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
3082+
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})
30653083

30663084
def test_expanduser(self):
30673085
P = self.cls
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add *follow_symlinks* argument to :meth:`pathlib.Path.glob` and
2+
:meth:`~pathlib.Path.rglob`, defaulting to false.

0 commit comments

Comments
 (0)