@@ -76,23 +76,93 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
7676
7777 # prevent traversal attempts through links
7878 if tarinfo .islnk () or tarinfo .issym ():
79- if Path (tarinfo .linkname ).is_absolute ():
79+ rel_target = Path (tarinfo .linkname )
80+ if rel_target .is_absolute ():
81+ # If target is absolute, we'll rewrite to be relative to the symlink
82+
83+ # Strip leading '/' to make the path relative to extract directory
84+ rel_target = (extract_root / rel_target .relative_to ("/" )).relative_to (
85+ extract_root
86+ )
87+
88+ # Now we need to find the path from the symlink to the target
89+ # If the symlink is in a directory like /foo/bar and the target is at /target
90+ # we need to make rel_target ../target (relative to /foo) instead of just being /target (relative to /)
91+
92+ # Let's calculate depth of the symlink itself and the depth of the target
93+ symlink_depth = len (tarinfo .name .split ("/" ))
94+ target_depth = len (rel_target .parts )
95+
96+ # If the symlink is deeper than the target, we need to go up by the difference in depth
97+ if symlink_depth > target_depth :
98+ # We need to go up by the difference in depth
99+ rel_target = (
100+ Path (
101+ "/" .join (
102+ [".." for _ in range (symlink_depth - target_depth )]
103+ )
104+ )
105+ / rel_target
106+ )
107+
80108 self .record_problem (
81109 tarinfo ,
82110 "Absolute path as link target." ,
83111 "Converted to extraction relative path." ,
84112 )
85- tarinfo .linkname = f"./{ tarinfo .linkname } "
86- if not is_safe_path (
87- basedir = extract_root ,
88- path = extract_root / tarinfo .linkname ,
89- ):
90- self .record_problem (
91- tarinfo ,
113+
114+ # The symlink will point to our relative target (may be updated below if unsafe)
115+ tarinfo .linkname = rel_target
116+ logger .info (
117+ "Link target is relative" , linkname = tarinfo .linkname , name = tarinfo .name
118+ )
119+
120+ # Resolve the link target to an absolute path
121+ resolved_path = (extract_root / tarinfo .name ).parent / rel_target
122+
123+ # If the resolved path points outside of extract_root, we need to fix it!
124+ if not is_safe_path (extract_root , resolved_path ):
125+ logger .warning (
92126 "Traversal attempt through link path." ,
93- "Skipped." ,
127+ src = tarinfo .name ,
128+ dest = tarinfo .linkname ,
129+ basedir = extract_root ,
130+ resovled_path = resolved_path ,
94131 )
95- return
132+
133+ for drop_count in range (len (str (rel_target ).split ("/" ))):
134+ new_path = (
135+ (extract_root / tarinfo .name ).parent
136+ / Path ("/" .join (["placeholder" ] * drop_count ))
137+ / rel_target
138+ )
139+ resolved_path = new_path .resolve ()
140+ if str (resolved_path ).startswith (str (extract_root )):
141+ break
142+ else :
143+ # We didn't hit the break, we couldn't resolve the path safely
144+ self .record_problem (
145+ tarinfo ,
146+ "Traversal attempt through link path." ,
147+ "Skipped." ,
148+ )
149+ return
150+
151+ # Double check that it's safe now
152+ if not is_safe_path (extract_root , resolved_path ):
153+ self .record_problem (
154+ tarinfo ,
155+ "Traversal attempt through link path." ,
156+ "Skipped." ,
157+ )
158+ return
159+
160+ # Prepend placeholder directories before rel_target to get a valid path
161+ # within extract_root. This is the relative version of resolved_path.
162+ rel_target = Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
163+ tarinfo .linkname = rel_target
164+
165+ logger .debug ("Creating symlink" , points_to = resolved_path , name = tarinfo .name )
96166
97167 target_path = extract_root / tarinfo .name
98168 # directories are special: we can not set their metadata now + they might also be already existing
0 commit comments