Skip to content

Commit b2758ff

Browse files
jaracowarsaw
authored andcommitted
bpo-36832: add zipfile.Path (#13153)
* bpo-36832: add zipfile.Path * bpo-36832: add documentation for zipfile.Path * πŸ“œπŸ€– Added by blurb_it. * Remove module reference from blurb. * Sort the imports * Update docstrings and docs per recommendations. * Rely on test.support.temp_dir * Signal that 'root' is the parameter. * Correct spelling of 'mod' * Convert docstring to comment for brevity. * Fix more errors in the docs
1 parent 70b8054 commit b2758ff

File tree

4 files changed

+320
-8
lines changed

4 files changed

+320
-8
lines changed

β€ŽDoc/library/zipfile.rst

+67
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,15 @@ The module defines the following items:
5252
:ref:`zipfile-objects` for constructor details.
5353

5454

55+
.. class:: Path
56+
:noindex:
57+
58+
A pathlib-compatible wrapper for zip files. See section
59+
:ref:`path-objects` for details.
60+
61+
.. versionadded:: 3.8
62+
63+
5564
.. class:: PyZipFile
5665
:noindex:
5766

@@ -456,6 +465,64 @@ The following data attributes are also available:
456465
truncated.
457466

458467

468+
.. _path-objects:
469+
470+
Path Objects
471+
------------
472+
473+
.. class:: Path(root, at='')
474+
475+
Construct a Path object from a ``root`` zipfile (which may be a
476+
:class:`ZipFile` instance or ``file`` suitable for passing to
477+
the :class:`ZipFile` constructor).
478+
479+
``at`` specifies the location of this Path within the zipfile,
480+
e.g. 'dir/file.txt', 'dir/', or ''. Defaults to the empty string,
481+
indicating the root.
482+
483+
Path objects expose the following features of :mod:`pathlib.Path`
484+
objects:
485+
486+
Path objects are traversable using the ``/`` operator.
487+
488+
.. attribute:: Path.name
489+
490+
The final path component.
491+
492+
.. method:: Path.open(*, **)
493+
494+
Invoke :meth:`ZipFile.open` on the current path. Accepts
495+
the same arguments as :meth:`ZipFile.open`.
496+
497+
.. method:: Path.listdir()
498+
499+
Enumerate the children of the current directory.
500+
501+
.. method:: Path.is_dir()
502+
503+
Return ``True`` if the current context references a directory.
504+
505+
.. method:: Path.is_file()
506+
507+
Return ``True`` if the current context references a file.
508+
509+
.. method:: Path.exists()
510+
511+
Return ``True`` if the current context references a file or
512+
directory in the zip file.
513+
514+
.. method:: Path.read_text(*, **)
515+
516+
Read the current file as unicode text. Positional and
517+
keyword arguments are passed through to
518+
:class:`io.TextIOWrapper` (except ``buffer``, which is
519+
implied by the context).
520+
521+
.. method:: Path.read_bytes()
522+
523+
Read the current file as bytes.
524+
525+
459526
.. _pyzipfile-objects:
460527

461528
PyZipFile Objects

β€ŽLib/test/test_zipfile.py

+113-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
import contextlib
2+
import importlib.util
23
import io
34
import os
4-
import importlib.util
55
import pathlib
66
import posixpath
7-
import time
7+
import shutil
88
import struct
9-
import zipfile
9+
import tempfile
10+
import time
1011
import unittest
12+
import zipfile
1113

1214

1315
from tempfile import TemporaryFile
@@ -2392,5 +2394,113 @@ def test_extract_command(self):
23922394
with open(path, 'rb') as f:
23932395
self.assertEqual(f.read(), zf.read(zi))
23942396

2397+
2398+
# Poor man's technique to consume a (smallish) iterable.
2399+
consume = tuple
2400+
2401+
2402+
def add_dirs(zipfile):
2403+
"""
2404+
Given a writable zipfile, inject directory entries for
2405+
any directories implied by the presence of children.
2406+
"""
2407+
names = zipfile.namelist()
2408+
consume(
2409+
zipfile.writestr(name + "/", b"")
2410+
for name in map(posixpath.dirname, names)
2411+
if name and name + "/" not in names
2412+
)
2413+
return zipfile
2414+
2415+
2416+
def build_abcde_files():
2417+
"""
2418+
Create a zip file with this structure:
2419+
2420+
.
2421+
β”œβ”€β”€ a.txt
2422+
└── b
2423+
β”œβ”€β”€ c.txt
2424+
└── d
2425+
└── e.txt
2426+
"""
2427+
data = io.BytesIO()
2428+
zf = zipfile.ZipFile(data, "w")
2429+
zf.writestr("a.txt", b"content of a")
2430+
zf.writestr("b/c.txt", b"content of c")
2431+
zf.writestr("b/d/e.txt", b"content of e")
2432+
zf.filename = "abcde.zip"
2433+
return zf
2434+
2435+
2436+
class TestPath(unittest.TestCase):
2437+
def setUp(self):
2438+
self.fixtures = contextlib.ExitStack()
2439+
self.addCleanup(self.fixtures.close)
2440+
2441+
def zipfile_abcde(self):
2442+
with self.subTest():
2443+
yield build_abcde_files()
2444+
with self.subTest():
2445+
yield add_dirs(build_abcde_files())
2446+
2447+
def zipfile_ondisk(self):
2448+
tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir()))
2449+
for zipfile_abcde in self.zipfile_abcde():
2450+
buffer = zipfile_abcde.fp
2451+
zipfile_abcde.close()
2452+
path = tmpdir / zipfile_abcde.filename
2453+
with path.open("wb") as strm:
2454+
strm.write(buffer.getvalue())
2455+
yield path
2456+
2457+
def test_iterdir_istype(self):
2458+
for zipfile_abcde in self.zipfile_abcde():
2459+
root = zipfile.Path(zipfile_abcde)
2460+
assert root.is_dir()
2461+
a, b = root.iterdir()
2462+
assert a.is_file()
2463+
assert b.is_dir()
2464+
c, d = b.iterdir()
2465+
assert c.is_file()
2466+
e, = d.iterdir()
2467+
assert e.is_file()
2468+
2469+
def test_open(self):
2470+
for zipfile_abcde in self.zipfile_abcde():
2471+
root = zipfile.Path(zipfile_abcde)
2472+
a, b = root.iterdir()
2473+
with a.open() as strm:
2474+
data = strm.read()
2475+
assert data == b"content of a"
2476+
2477+
def test_read(self):
2478+
for zipfile_abcde in self.zipfile_abcde():
2479+
root = zipfile.Path(zipfile_abcde)
2480+
a, b = root.iterdir()
2481+
assert a.read_text() == "content of a"
2482+
assert a.read_bytes() == b"content of a"
2483+
2484+
def test_traverse_truediv(self):
2485+
for zipfile_abcde in self.zipfile_abcde():
2486+
root = zipfile.Path(zipfile_abcde)
2487+
a = root / "a"
2488+
assert a.is_file()
2489+
e = root / "b" / "d" / "e.txt"
2490+
assert e.read_text() == "content of e"
2491+
2492+
def test_pathlike_construction(self):
2493+
"""
2494+
zipfile.Path should be constructable from a path-like object
2495+
"""
2496+
for zipfile_ondisk in self.zipfile_ondisk():
2497+
pathlike = pathlib.Path(str(zipfile_ondisk))
2498+
zipfile.Path(pathlike)
2499+
2500+
def test_traverse_pathlike(self):
2501+
for zipfile_abcde in self.zipfile_abcde():
2502+
root = zipfile.Path(zipfile_abcde)
2503+
root / pathlib.Path("a")
2504+
23952505
if __name__ == "__main__":
23962506
unittest.main()

β€ŽLib/zipfile.py

+139-5
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,18 @@
33
44
XXX references to utf-8 need further investigation.
55
"""
6+
import binascii
7+
import functools
8+
import importlib.util
69
import io
710
import os
8-
import importlib.util
9-
import sys
10-
import time
11-
import stat
11+
import posixpath
1212
import shutil
13+
import stat
1314
import struct
14-
import binascii
15+
import sys
1516
import threading
17+
import time
1618

1719
try:
1820
import zlib # We may need its compression method
@@ -2102,6 +2104,138 @@ def _compile(file, optimize=-1):
21022104
return (fname, archivename)
21032105

21042106

2107+
class Path:
2108+
"""
2109+
A pathlib-compatible interface for zip files.
2110+
2111+
Consider a zip file with this structure::
2112+
2113+
.
2114+
β”œβ”€β”€ a.txt
2115+
└── b
2116+
β”œβ”€β”€ c.txt
2117+
└── d
2118+
└── e.txt
2119+
2120+
>>> data = io.BytesIO()
2121+
>>> zf = ZipFile(data, 'w')
2122+
>>> zf.writestr('a.txt', 'content of a')
2123+
>>> zf.writestr('b/c.txt', 'content of c')
2124+
>>> zf.writestr('b/d/e.txt', 'content of e')
2125+
>>> zf.filename = 'abcde.zip'
2126+
2127+
Path accepts the zipfile object itself or a filename
2128+
2129+
>>> root = Path(zf)
2130+
2131+
From there, several path operations are available.
2132+
2133+
Directory iteration (including the zip file itself):
2134+
2135+
>>> a, b = root.iterdir()
2136+
>>> a
2137+
Path('abcde.zip', 'a.txt')
2138+
>>> b
2139+
Path('abcde.zip', 'b/')
2140+
2141+
name property:
2142+
2143+
>>> b.name
2144+
'b'
2145+
2146+
join with divide operator:
2147+
2148+
>>> c = b / 'c.txt'
2149+
>>> c
2150+
Path('abcde.zip', 'b/c.txt')
2151+
>>> c.name
2152+
'c.txt'
2153+
2154+
Read text:
2155+
2156+
>>> c.read_text()
2157+
'content of c'
2158+
2159+
existence:
2160+
2161+
>>> c.exists()
2162+
True
2163+
>>> (b / 'missing.txt').exists()
2164+
False
2165+
2166+
Coersion to string:
2167+
2168+
>>> str(c)
2169+
'abcde.zip/b/c.txt'
2170+
"""
2171+
2172+
__repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})"
2173+
2174+
def __init__(self, root, at=""):
2175+
self.root = root if isinstance(root, ZipFile) else ZipFile(root)
2176+
self.at = at
2177+
2178+
@property
2179+
def open(self):
2180+
return functools.partial(self.root.open, self.at)
2181+
2182+
@property
2183+
def name(self):
2184+
return posixpath.basename(self.at.rstrip("/"))
2185+
2186+
def read_text(self, *args, **kwargs):
2187+
with self.open() as strm:
2188+
return io.TextIOWrapper(strm, *args, **kwargs).read()
2189+
2190+
def read_bytes(self):
2191+
with self.open() as strm:
2192+
return strm.read()
2193+
2194+
def _is_child(self, path):
2195+
return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/")
2196+
2197+
def _next(self, at):
2198+
return Path(self.root, at)
2199+
2200+
def is_dir(self):
2201+
return not self.at or self.at.endswith("/")
2202+
2203+
def is_file(self):
2204+
return not self.is_dir()
2205+
2206+
def exists(self):
2207+
return self.at in self._names()
2208+
2209+
def iterdir(self):
2210+
if not self.is_dir():
2211+
raise ValueError("Can't listdir a file")
2212+
subs = map(self._next, self._names())
2213+
return filter(self._is_child, subs)
2214+
2215+
def __str__(self):
2216+
return posixpath.join(self.root.filename, self.at)
2217+
2218+
def __repr__(self):
2219+
return self.__repr.format(self=self)
2220+
2221+
def __truediv__(self, add):
2222+
next = posixpath.join(self.at, add)
2223+
next_dir = posixpath.join(self.at, add, "")
2224+
names = self._names()
2225+
return self._next(next_dir if next not in names and next_dir in names else next)
2226+
2227+
@staticmethod
2228+
def _add_implied_dirs(names):
2229+
return names + [
2230+
name + "/"
2231+
for name in map(posixpath.dirname, names)
2232+
if name and name + "/" not in names
2233+
]
2234+
2235+
def _names(self):
2236+
return self._add_implied_dirs(self.root.namelist())
2237+
2238+
21052239
def main(args=None):
21062240
import argparse
21072241

Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Introducing ``zipfile.Path``, a pathlib-compatible wrapper for traversing zip files.

0 commit comments

Comments
Β (0)