Skip to content

Commit dc71747

Browse files
committed
refact(archive/ar): extract with pure-Python arpy instead of unar
Although arpy looks unmaintained, it is a readable implementation. It will be easier to fix problems there/in a fork, than in unar/7z.
1 parent 7d4ed43 commit dc71747

File tree

1 file changed

+39
-4
lines changed
  • python/unblob/handlers/archive

1 file changed

+39
-4
lines changed

python/unblob/handlers/archive/ar.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import os
2+
from pathlib import Path
23
from typing import Optional
34

45
import arpy
56
from structlog import get_logger
67

7-
from ...extractors import Command
8-
from ...file_utils import OffsetFile
9-
from ...models import File, Handler, HexString, ValidChunk
8+
from ...file_utils import FileSystem, OffsetFile, iterate_file
9+
from ...models import Extractor, ExtractResult, File, Handler, HexString, ValidChunk
10+
from ...report import ExtractionProblem
1011

1112
logger = get_logger()
1213

@@ -15,6 +16,40 @@
1516
SIGNATURE_LENGTH = 0x8
1617

1718

19+
class ArExtractor(Extractor):
20+
def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]:
21+
fs = FileSystem(outdir)
22+
23+
with arpy.Archive(inpath.as_posix()) as archive:
24+
archive.read_all_headers()
25+
26+
for name in sorted(archive.archived_files):
27+
archived_file = archive.archived_files[name]
28+
29+
try:
30+
path = Path(name.decode())
31+
except UnicodeDecodeError:
32+
path = Path(name.decode(errors="replace"))
33+
fs.record_problem(
34+
ExtractionProblem(
35+
path=repr(name),
36+
problem="Path is not a valid UTF/8 string",
37+
resolution=f"Converted to {path}",
38+
)
39+
)
40+
41+
fs.write_chunks(
42+
path,
43+
chunks=iterate_file(
44+
archived_file,
45+
0,
46+
archived_file.header.size,
47+
),
48+
)
49+
50+
return ExtractResult(reports=fs.problems)
51+
52+
1853
class ARHandler(Handler):
1954
NAME = "ar"
2055

@@ -27,7 +62,7 @@ class ARHandler(Handler):
2762
)
2863
]
2964

30-
EXTRACTOR = Command("unar", "-no-directory", "-o", "{outdir}", "{inpath}")
65+
EXTRACTOR = ArExtractor()
3166

3267
def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
3368
offset_file = OffsetFile(file, start_offset)

0 commit comments

Comments
 (0)