Skip to content

Commit a909ec3

Browse files
miss-islingtonencukouvstinnerfrenzymadness
authored
[3.12] gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) (#108211)
gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) (cherry picked from commit acbd3f9) Co-authored-by: Petr Viktorin <[email protected]> Co-authored-by: Victor Stinner <[email protected]> Co-authored-by: Lumír 'Frenzy' Balhar <[email protected]>
1 parent e1b069f commit a909ec3

File tree

4 files changed

+156
-9
lines changed

4 files changed

+156
-9
lines changed

Doc/library/tarfile.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -735,6 +735,11 @@ A ``TarInfo`` object has the following public data attributes:
735735
Name of the target file name, which is only present in :class:`TarInfo` objects
736736
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
737737

738+
For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory
739+
that contains the link.
740+
For hard links (``LNKTYPE``), the *linkname* is relative to the root of
741+
the archive.
742+
738743

739744
.. attribute:: TarInfo.uid
740745
:type: int

Lib/tarfile.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,7 @@ def __init__(self, tarinfo):
742742
class AbsoluteLinkError(FilterError):
743743
def __init__(self, tarinfo):
744744
self.tarinfo = tarinfo
745-
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
745+
super().__init__(f'{tarinfo.name!r} is a link to an absolute path')
746746

747747
class LinkOutsideDestinationError(FilterError):
748748
def __init__(self, tarinfo, path):
@@ -802,7 +802,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True):
802802
if member.islnk() or member.issym():
803803
if os.path.isabs(member.linkname):
804804
raise AbsoluteLinkError(member)
805-
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
805+
if member.issym():
806+
target_path = os.path.join(dest_path,
807+
os.path.dirname(name),
808+
member.linkname)
809+
else:
810+
target_path = os.path.join(dest_path,
811+
member.linkname)
812+
target_path = os.path.realpath(target_path)
806813
if os.path.commonpath([target_path, dest_path]) != dest_path:
807814
raise LinkOutsideDestinationError(member, target_path)
808815
return new_attrs

Lib/test/test_tarfile.py

Lines changed: 139 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3331,10 +3331,12 @@ def __exit__(self, *exc):
33313331
self.bio = None
33323332

33333333
def add(self, name, *, type=None, symlink_to=None, hardlink_to=None,
3334-
mode=None, **kwargs):
3334+
mode=None, size=None, **kwargs):
33353335
"""Add a member to the test archive. Call within `with`."""
33363336
name = str(name)
33373337
tarinfo = tarfile.TarInfo(name).replace(**kwargs)
3338+
if size is not None:
3339+
tarinfo.size = size
33383340
if mode:
33393341
tarinfo.mode = _filemode_to_int(mode)
33403342
if symlink_to is not None:
@@ -3410,7 +3412,8 @@ def check_context(self, tar, filter):
34103412
raise self.raised_exception
34113413
self.assertEqual(self.expected_paths, set())
34123414

3413-
def expect_file(self, name, type=None, symlink_to=None, mode=None):
3415+
def expect_file(self, name, type=None, symlink_to=None, mode=None,
3416+
size=None):
34143417
"""Check a single file. See check_context."""
34153418
if self.raised_exception:
34163419
raise self.raised_exception
@@ -3439,6 +3442,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None):
34393442
self.assertTrue(path.is_fifo())
34403443
else:
34413444
raise NotImplementedError(type)
3445+
if size is not None:
3446+
self.assertEqual(path.stat().st_size, size)
34423447
for parent in path.parents:
34433448
self.expected_paths.discard(parent)
34443449

@@ -3485,8 +3490,15 @@ def test_parent_symlink(self):
34853490
# Test interplaying symlinks
34863491
# Inspired by 'dirsymlink2a' in jwilk/traversal-archives
34873492
with ArchiveMaker() as arc:
3493+
3494+
# `current` links to `.` which is both:
3495+
# - the destination directory
3496+
# - `current` itself
34883497
arc.add('current', symlink_to='.')
3498+
3499+
# effectively points to ./../
34893500
arc.add('parent', symlink_to='current/..')
3501+
34903502
arc.add('parent/evil')
34913503

34923504
if os_helper.can_symlink():
@@ -3528,9 +3540,46 @@ def test_parent_symlink(self):
35283540
def test_parent_symlink2(self):
35293541
# Test interplaying symlinks
35303542
# Inspired by 'dirsymlink2b' in jwilk/traversal-archives
3543+
3544+
# Posix and Windows have different pathname resolution:
3545+
# either symlink or a '..' component resolve first.
3546+
# Let's see which we are on.
3547+
if os_helper.can_symlink():
3548+
testpath = os.path.join(TEMPDIR, 'resolution_test')
3549+
os.mkdir(testpath)
3550+
3551+
# testpath/current links to `.` which is all of:
3552+
# - `testpath`
3553+
# - `testpath/current`
3554+
# - `testpath/current/current`
3555+
# - etc.
3556+
os.symlink('.', os.path.join(testpath, 'current'))
3557+
3558+
# we'll test where `testpath/current/../file` ends up
3559+
with open(os.path.join(testpath, 'current', '..', 'file'), 'w'):
3560+
pass
3561+
3562+
if os.path.exists(os.path.join(testpath, 'file')):
3563+
# Windows collapses 'current\..' to '.' first, leaving
3564+
# 'testpath\file'
3565+
dotdot_resolves_early = True
3566+
elif os.path.exists(os.path.join(testpath, '..', 'file')):
3567+
# Posix resolves 'current' to '.' first, leaving
3568+
# 'testpath/../file'
3569+
dotdot_resolves_early = False
3570+
else:
3571+
raise AssertionError('Could not determine link resolution')
3572+
35313573
with ArchiveMaker() as arc:
3574+
3575+
# `current` links to `.` which is both the destination directory
3576+
# and `current` itself
35323577
arc.add('current', symlink_to='.')
3578+
3579+
# `current/parent` is also available as `./parent`,
3580+
# and effectively points to `./../`
35333581
arc.add('current/parent', symlink_to='..')
3582+
35343583
arc.add('parent/evil')
35353584

35363585
with self.check_context(arc.open(), 'fully_trusted'):
@@ -3544,6 +3593,7 @@ def test_parent_symlink2(self):
35443593

35453594
with self.check_context(arc.open(), 'tar'):
35463595
if os_helper.can_symlink():
3596+
# Fail when extracting a file outside destination
35473597
self.expect_exception(
35483598
tarfile.OutsideDestinationError,
35493599
"'parent/evil' would be extracted to "
@@ -3554,10 +3604,24 @@ def test_parent_symlink2(self):
35543604
self.expect_file('parent/evil')
35553605

35563606
with self.check_context(arc.open(), 'data'):
3557-
self.expect_exception(
3558-
tarfile.LinkOutsideDestinationError,
3559-
"""'current/parent' would link to ['"].*['"], """
3560-
+ "which is outside the destination")
3607+
if os_helper.can_symlink():
3608+
if dotdot_resolves_early:
3609+
# Fail when extracting a file outside destination
3610+
self.expect_exception(
3611+
tarfile.OutsideDestinationError,
3612+
"'parent/evil' would be extracted to "
3613+
+ """['"].*evil['"], which is outside """
3614+
+ "the destination")
3615+
else:
3616+
# Fail as soon as we have a symlink outside the destination
3617+
self.expect_exception(
3618+
tarfile.LinkOutsideDestinationError,
3619+
"'current/parent' would link to "
3620+
+ """['"].*outerdir['"], which is outside """
3621+
+ "the destination")
3622+
else:
3623+
self.expect_file('current/')
3624+
self.expect_file('parent/evil')
35613625

35623626
@symlink_test
35633627
def test_absolute_symlink(self):
@@ -3587,12 +3651,30 @@ def test_absolute_symlink(self):
35873651
with self.check_context(arc.open(), 'data'):
35883652
self.expect_exception(
35893653
tarfile.AbsoluteLinkError,
3590-
"'parent' is a symlink to an absolute path")
3654+
"'parent' is a link to an absolute path")
3655+
3656+
def test_absolute_hardlink(self):
3657+
# Test hardlink to an absolute path
3658+
# Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives
3659+
with ArchiveMaker() as arc:
3660+
arc.add('parent', hardlink_to=self.outerdir / 'foo')
3661+
3662+
with self.check_context(arc.open(), 'fully_trusted'):
3663+
self.expect_exception(KeyError, ".*foo. not found")
3664+
3665+
with self.check_context(arc.open(), 'tar'):
3666+
self.expect_exception(KeyError, ".*foo. not found")
3667+
3668+
with self.check_context(arc.open(), 'data'):
3669+
self.expect_exception(
3670+
tarfile.AbsoluteLinkError,
3671+
"'parent' is a link to an absolute path")
35913672

35923673
@symlink_test
35933674
def test_sly_relative0(self):
35943675
# Inspired by 'relative0' in jwilk/traversal-archives
35953676
with ArchiveMaker() as arc:
3677+
# points to `../../tmp/moo`
35963678
arc.add('../moo', symlink_to='..//tmp/moo')
35973679

35983680
try:
@@ -3643,6 +3725,56 @@ def test_sly_relative2(self):
36433725
+ """['"].*moo['"], which is outside the """
36443726
+ "destination")
36453727

3728+
@symlink_test
3729+
def test_deep_symlink(self):
3730+
# Test that symlinks and hardlinks inside a directory
3731+
# point to the correct file (`target` of size 3).
3732+
# If links aren't supported we get a copy of the file.
3733+
with ArchiveMaker() as arc:
3734+
arc.add('targetdir/target', size=3)
3735+
# a hardlink's linkname is relative to the archive
3736+
arc.add('linkdir/hardlink', hardlink_to=os.path.join(
3737+
'targetdir', 'target'))
3738+
# a symlink's linkname is relative to the link's directory
3739+
arc.add('linkdir/symlink', symlink_to=os.path.join(
3740+
'..', 'targetdir', 'target'))
3741+
3742+
for filter in 'tar', 'data', 'fully_trusted':
3743+
with self.check_context(arc.open(), filter):
3744+
self.expect_file('targetdir/target', size=3)
3745+
self.expect_file('linkdir/hardlink', size=3)
3746+
if os_helper.can_symlink():
3747+
self.expect_file('linkdir/symlink', size=3,
3748+
symlink_to='../targetdir/target')
3749+
else:
3750+
self.expect_file('linkdir/symlink', size=3)
3751+
3752+
@symlink_test
3753+
def test_chains(self):
3754+
# Test chaining of symlinks/hardlinks.
3755+
# Symlinks are created before the files they point to.
3756+
with ArchiveMaker() as arc:
3757+
arc.add('linkdir/symlink', symlink_to='hardlink')
3758+
arc.add('symlink2', symlink_to=os.path.join(
3759+
'linkdir', 'hardlink2'))
3760+
arc.add('targetdir/target', size=3)
3761+
arc.add('linkdir/hardlink', hardlink_to='targetdir/target')
3762+
arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink')
3763+
3764+
for filter in 'tar', 'data', 'fully_trusted':
3765+
with self.check_context(arc.open(), filter):
3766+
self.expect_file('targetdir/target', size=3)
3767+
self.expect_file('linkdir/hardlink', size=3)
3768+
self.expect_file('linkdir/hardlink2', size=3)
3769+
if os_helper.can_symlink():
3770+
self.expect_file('linkdir/symlink', size=3,
3771+
symlink_to='hardlink')
3772+
self.expect_file('symlink2', size=3,
3773+
symlink_to='linkdir/hardlink2')
3774+
else:
3775+
self.expect_file('linkdir/symlink', size=3)
3776+
self.expect_file('symlink2', size=3)
3777+
36463778
def test_modes(self):
36473779
# Test how file modes are extracted
36483780
# (Note that the modes are ignored on platforms without working chmod)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:func:`tarfile.data_filter` now takes the location of symlinks into account
2+
when determining their target, so it will no longer reject some valid
3+
tarballs with ``LinkOutsideDestinationError``.

0 commit comments

Comments
 (0)