11# -*- coding: utf-8 -*-
22from pprint import pprint # noqa
3+ from pathlib import Path
34
45from .support import TestCase
56
@@ -11,14 +12,80 @@ def test_zip(self):
1112 self .assertEqual (entity .first ("processingStatus" ), self .manager .STATUS_SUCCESS )
1213 self .assertEqual (entity .schema .name , "Package" )
1314
15+ def test_zip_symlink_escape (self ):
16+ fixture_path , entity = self .fixture ("badzip.zip" )
17+
18+ # Ensure that the symlink target exists
19+ target = Path ("/ingestors/tests/fixtures/secret.txt" )
20+ assert target .read_text () == "This is a secret!"
21+
22+ self .manager .ingest (fixture_path , entity )
23+
24+ # Python’s zipfile handles symlinks that point to files outside of the archive root
25+ # treating them as normal files
26+ assert len (self .manager .entities ) == 2
27+ assert self .manager .entities [0 ].first ("fileName" ) == "secret.txt"
28+ assert (
29+ self .manager .entities [0 ].first ("bodyText" )
30+ == "/ingestors/tests/fixtures/secret.txt"
31+ )
32+ assert self .manager .entities [1 ].first ("fileName" ) == "badzip.zip"
33+
1434 def test_rar (self ):
1535 fixture_path , entity = self .fixture ("test-documents.rar" )
1636 self .manager .ingest (fixture_path , entity )
1737 self .assertEqual (entity .first ("processingStatus" ), self .manager .STATUS_SUCCESS )
1838 self .assertEqual (entity .schema .name , "Package" )
1939
40+ def test_rar_symlink_escape (self ):
41+ fixture_path , entity = self .fixture ("badrar.rar" )
42+
43+ # Ensure that the symlink target exists
44+ target = Path ("/ingestors/tests/fixtures/secret.txt" )
45+ assert target .read_text () == "This is a secret!"
46+
47+ self .manager .ingest (fixture_path , entity )
48+
49+ # rarfile handles symlinks that point to files outside of the archive root
50+ # treating them as normal files
51+ assert len (self .manager .entities ) == 2
52+ assert self .manager .entities [0 ].first ("fileName" ) == "secret.txt"
53+ assert (
54+ self .manager .entities [0 ].first ("bodyText" )
55+ == "/ingestors/tests/fixtures/secret.txt"
56+ )
57+ assert self .manager .entities [1 ].first ("fileName" ) == "badrar.rar"
58+
2059 def test_tar (self ):
2160 fixture_path , entity = self .fixture ("test-documents.tar" )
2261 self .manager .ingest (fixture_path , entity )
2362 self .assertEqual (entity .first ("processingStatus" ), self .manager .STATUS_SUCCESS )
2463 self .assertEqual (entity .schema .name , "Package" )
64+
65+ def test_tar_symlink_escape (self ):
66+ fixture_path , entity = self .fixture ("badtar.tar" )
67+
68+ # Ensure that the symlink target exists
69+ target = Path ("/ingestors/tests/fixtures/secret.txt" )
70+ assert target .read_text () == "This is a secret!"
71+
72+ self .manager .ingest (fixture_path , entity )
73+
74+ # Python’s tarfile ignores symlinks that point to files outside of the archive root
75+ assert len (self .manager .entities ) == 1
76+ assert self .manager .entities [0 ].first ("fileName" ) == "badtar.tar"
77+
78+ def test_7zip_symlink_escape (self ):
79+ fixture_path , entity = self .fixture ("bad7zip.7z" )
80+
81+ # Ensure that the symlink target exists
82+ target = Path ("/ingestors/tests/fixtures/secret.txt" )
83+ assert target .read_text () == "This is a secret!"
84+
85+ self .manager .ingest (fixture_path , entity )
86+
87+ # py7zr raises an exception if it encounters a symlink that points to a file
88+ # outside of the archive root
89+ assert len (self .manager .entities ) == 1
90+ assert self .manager .entities [0 ].first ("fileName" ) == "bad7zip.7z"
91+ assert self .manager .entities [0 ].first ("processingStatus" ) == "failure"
0 commit comments