Skip to content

Commit 22748a8

Browse files
bpo-41316: Make tarfile follow specs for FNAME (GH-21511)
tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information. RFC1952 says about FNAME: This is the original name of the file being compressed, with any directory components removed. So tarfile must remove directory names from FNAME and write only basename of file. Automerge-Triggered-By: @jaraco
1 parent fd4cafd commit 22748a8

File tree

4 files changed

+17
-1
lines changed

4 files changed

+17
-1
lines changed

Lib/tarfile.py

+2
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ def _init_write_gz(self):
420420
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
421421
if self.name.endswith(".gz"):
422422
self.name = self.name[:-3]
423+
# Honor "directory components removed" from RFC1952
424+
self.name = os.path.basename(self.name)
423425
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
424426
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
425427

Lib/test/test_tarfile.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1417,12 +1417,15 @@ def write(self, data):
14171417
pax_headers={'non': 'empty'})
14181418
self.assertFalse(f.closed)
14191419

1420+
14201421
class GzipWriteTest(GzipTest, WriteTest):
14211422
pass
14221423

1424+
14231425
class Bz2WriteTest(Bz2Test, WriteTest):
14241426
pass
14251427

1428+
14261429
class LzmaWriteTest(LzmaTest, WriteTest):
14271430
pass
14281431

@@ -1465,8 +1468,17 @@ def test_file_mode(self):
14651468
finally:
14661469
os.umask(original_umask)
14671470

1471+
14681472
class GzipStreamWriteTest(GzipTest, StreamWriteTest):
1469-
pass
1473+
def test_source_directory_not_leaked(self):
1474+
"""
1475+
Ensure the source directory is not included in the tar header
1476+
per bpo-41316.
1477+
"""
1478+
tarfile.open(tmpname, self.mode).close()
1479+
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
1480+
assert os.path.dirname(tmpname) not in payload
1481+
14701482

14711483
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
14721484
decompressor = bz2.BZ2Decompressor if bz2 else None

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ Colm Buckley
242242
Erik de Bueger
243243
Jan-Hein Bührman
244244
Lars Buitinck
245+
Artem Bulgakov
245246
Dick Bulterman
246247
Bill Bumgarner
247248
Jimmy Burgett
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header.

0 commit comments

Comments
 (0)