Skip to content

Commit 64cade4

Browse files
authored
Merge pull request #399 from onekey-sec/magic-wrapper-change
Change from file-magic to python-magic, use MAGIC_CONTINUE
2 parents 7be9a98 + c8a80de commit 64cade4

File tree

5 files changed

+32
-19
lines changed

5 files changed

+32
-19
lines changed

default.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ let
6363
];
6464
});
6565

66-
file-magic = (super.file-magic.override { preferWheel = false; }).overridePythonAttrs (_: {
66+
python-magic = (super.python-magic.override { preferWheel = false; }).overridePythonAttrs (_: {
6767
patchPhase = ''
68-
substituteInPlace magic.py --replace "find_library('magic')" "'${file}/lib/libmagic.so'"
68+
substituteInPlace magic/loader.py --replace "find_library('magic')" "'${file}/lib/libmagic.so'"
6969
'';
7070
});
7171

poetry.lock

Lines changed: 13 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jefferson = { git = "https://github.com/onekey-sec/jefferson.git", rev = "ddbc59
2525
yaffshiv = { git = "https://github.com/onekey-sec/yaffshiv.git", rev = "24e6e453a36a02144ae2d159eb3229f9c6312828" }
2626
plotext = "^4.1.5"
2727
pluggy = "^1.0.0"
28-
file-magic = "^0.4.0"
28+
python-magic = "^0.4.27"
2929
hyperscan = "^0.3.0"
3030
lark = "^1.1.2"
3131
lz4 = "^4.0.0"

unblob/report.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,26 @@ def from_path(cls, path: Path):
140140
)
141141

142142

143+
# libmagic helpers
144+
# file magic uses a rule-set to guess the file type, however as rules are added they could
145+
# shadow each other. File magic uses rule priorities to determine which is the best matching
146+
# rule, however this could shadow other valid matches as well, which could eventually break
147+
# any further processing that depends on magic.
148+
# By enabling keep_going (which eventually enables MAGIC_CONTINUE) all matching patterns
149+
# will be included in the magic string at the cost of being a bit slower, but increasing
150+
# accuracy by no shadowing rules.
151+
get_magic = magic.Magic(keep_going=True).from_file
152+
get_mime_type = magic.Magic(mime=True).from_file
153+
154+
143155
@attr.define(kw_only=True)
144156
class FileMagicReport(Report):
145157
magic: str
146158
mime_type: str
147159

148160
@classmethod
149161
def from_path(cls, path: Path):
150-
detected = magic.detect_from_filename(path)
151-
return cls(magic=detected.name, mime_type=detected.mime_type)
162+
return cls(magic=get_magic(path), mime_type=get_mime_type(path))
152163

153164

154165
@attr.define(kw_only=True)

vulture_whitelist.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from unblob.file_utils import File, iterbits, round_down
66
from unblob.models import _JSONEncoder
77
from unblob.parser import _HexStringToRegex
8-
from unblob.report import ChunkReport
8+
from unblob.report import ChunkReport, FileMagicReport
99

1010
_HexStringToRegex.literal
1111
_HexStringToRegex.wildcard
@@ -16,6 +16,8 @@
1616
_JSONEncoder.default
1717

1818
ChunkReport.handler_name
19+
FileMagicReport.magic
20+
FileMagicReport.mime_type
1921

2022
sys.breakpointhook
2123
cli.cli.context_class

0 commit comments

Comments
 (0)