Skip to content

Commit 7917170

Browse files
bpo-41316: Make tarfile follow specs for FNAME (GH-21511)
tarfile writes full path to FNAME field of GZIP format instead of just basename if user specified absolute path. Some archive viewers may process file incorrectly. Also it creates security issue because anyone can know structure of directories on system and know username or other personal information. RFC1952 says about FNAME: This is the original name of the file being compressed, with any directory components removed. So tarfile must remove directory names from FNAME and write only basename of file. Automerge-Triggered-By: @jaraco (cherry picked from commit 22748a8) Co-authored-by: Artem Bulgakov <ArtemSBulgakov@ya.ru>
1 parent 19019ec commit 7917170

File tree

4 files changed

+17
-1
lines changed

4 files changed

+17
-1
lines changed

Lib/tarfile.py

+2
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,8 @@ def _init_write_gz(self):
420420
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
421421
if self.name.endswith(".gz"):
422422
self.name = self.name[:-3]
423+
# Honor "directory components removed" from RFC1952
424+
self.name = os.path.basename(self.name)
423425
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
424426
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
425427

Lib/test/test_tarfile.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1416,12 +1416,15 @@ def write(self, data):
14161416
pax_headers={'non': 'empty'})
14171417
self.assertFalse(f.closed)
14181418

1419+
14191420
class GzipWriteTest(GzipTest, WriteTest):
14201421
pass
14211422

1423+
14221424
class Bz2WriteTest(Bz2Test, WriteTest):
14231425
pass
14241426

1427+
14251428
class LzmaWriteTest(LzmaTest, WriteTest):
14261429
pass
14271430

@@ -1464,8 +1467,17 @@ def test_file_mode(self):
14641467
finally:
14651468
os.umask(original_umask)
14661469

1470+
14671471
class GzipStreamWriteTest(GzipTest, StreamWriteTest):
1468-
pass
1472+
def test_source_directory_not_leaked(self):
1473+
"""
1474+
Ensure the source directory is not included in the tar header
1475+
per bpo-41316.
1476+
"""
1477+
tarfile.open(tmpname, self.mode).close()
1478+
payload = pathlib.Path(tmpname).read_text(encoding='latin-1')
1479+
assert os.path.dirname(tmpname) not in payload
1480+
14691481

14701482
class Bz2StreamWriteTest(Bz2Test, StreamWriteTest):
14711483
decompressor = bz2.BZ2Decompressor if bz2 else None

Misc/ACKS

+1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ Colm Buckley
243243
Erik de Bueger
244244
Jan-Hein Bührman
245245
Lars Buitinck
246+
Artem Bulgakov
246247
Dick Bulterman
247248
Bill Bumgarner
248249
Jimmy Burgett
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix the :mod:`tarfile` module to write only basename of TAR file to GZIP compression header.

0 commit comments

Comments
 (0)