@@ -76,23 +76,73 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
7676
7777 # prevent traversal attempts through links
7878 if tarinfo .islnk () or tarinfo .issym ():
79- if Path (tarinfo .linkname ).is_absolute ():
80- self .record_problem (
81- tarinfo ,
82- "Absolute path as link target." ,
83- "Converted to extraction relative path." ,
84- )
85- tarinfo .linkname = f"./{ tarinfo .linkname } "
86- if not is_safe_path (
87- basedir = extract_root ,
88- path = extract_root / tarinfo .linkname ,
89- ):
90- self .record_problem (
91- tarinfo ,
79+ link_target = Path (tarinfo .linkname )
80+
81+ # Check if the link is absolute and make it relative to extract_root
82+ if link_target .is_absolute ():
83+ # Strip leading '/' to make the path relative
84+ rel_target = link_target .relative_to ("/" )
85+
86+ if Path (tarinfo .linkname ).is_absolute ():
87+ self .record_problem (
88+ tarinfo ,
89+ "Absolute path as link target." ,
90+ "Converted to extraction relative path." ,
91+ )
92+ else :
93+ # Directly use the relative link target. If it points to an unsafe path, we'll
94+ # check and fix below
95+ rel_target = link_target
96+
97+ # The symlink will point to our relative target (may be updated below if unsafe)
98+ tarinfo .linkname = rel_target
99+
100+ # Resolve the link target to an absolute path
101+ resolved_path = (extract_root / tarinfo .name ).parent / rel_target
102+
103+ # If the resolved path points outside of extract_root, we need to fix it!
104+ if not is_safe_path (extract_root , resolved_path ):
105+ logger .warning (
92106 "Traversal attempt through link path." ,
93- "Skipped." ,
107+ src = tarinfo .name ,
108+ dest = tarinfo .linkname ,
109+ basedir = extract_root ,
110+ resovled_path = resolved_path ,
94111 )
95- return
112+
113+ for drop_count in range (len (str (rel_target ).split ("/" ))):
114+ new_path = (
115+ (extract_root / tarinfo .name ).parent
116+ / Path ("/" .join (["placeholder" ] * drop_count ))
117+ / rel_target
118+ )
119+ resolved_path = new_path .resolve ()
120+ if str (resolved_path ).startswith (str (extract_root )):
121+ break
122+ else :
123+ # We didn't hit the break, we couldn't resolve the path safely
124+ self .record_problem (
125+ tarinfo ,
126+ "Traversal attempt through link path." ,
127+ "Skipped." ,
128+ )
129+ return
130+
131+ # Double check that it's safe now
132+ if not is_safe_path (extract_root , resolved_path ):
133+ self .record_problem (
134+ tarinfo ,
135+ "Traversal attempt through link path." ,
136+ "Skipped." ,
137+ )
138+ return
139+
140+ # Prepend placeholder directories before rel_target to get a valid path
141+ # within extract_root. This is the relative version of resolved_path.
142+ rel_target = Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
143+ tarinfo .linkname = rel_target
144+
145+ logger .debug ("Creating symlink" , points_to = resolved_path , name = tarinfo .name )
96146
97147 target_path = extract_root / tarinfo .name
98148 # directories are special: we can not set their metadata now + they might also be already existing
0 commit comments