Skip to content

Commit c8a80de

Browse files
committed
Change from file-magic to python-magic, use MAGIC_CONTINUE
file magic uses a rule-set to guess the file type, however as rules are added they could shadow each other. File magic uses rule priorities to determine which is the best matching rule, however this could shadow other valid matches as well, which could eventually break any further processing that depends on magic. By enabling keep_going (which eventually enables MAGIC_CONTINUE) all matching patterns will be included in the magic string at the cost of being a bit slower, but increasing accuracy by no shadowing rules. MAGIC_CONTINUE is more transparently supported with python-magic. Also file-magic's last release was in 2018, while there are recent changes on github.
1 parent 7be9a98 commit c8a80de

File tree

5 files changed

+32
-19
lines changed

5 files changed

+32
-19
lines changed

default.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,9 @@ let
6363
];
6464
});
6565

66-
file-magic = (super.file-magic.override { preferWheel = false; }).overridePythonAttrs (_: {
66+
python-magic = (super.python-magic.override { preferWheel = false; }).overridePythonAttrs (_: {
6767
patchPhase = ''
68-
substituteInPlace magic.py --replace "find_library('magic')" "'${file}/lib/libmagic.so'"
68+
substituteInPlace magic/loader.py --replace "find_library('magic')" "'${file}/lib/libmagic.so'"
6969
'';
7070
});
7171

poetry.lock

Lines changed: 13 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jefferson = { git = "https://github.com/onekey-sec/jefferson.git", rev = "ddbc59
2525
yaffshiv = { git = "https://github.com/onekey-sec/yaffshiv.git", rev = "24e6e453a36a02144ae2d159eb3229f9c6312828" }
2626
plotext = "^4.1.5"
2727
pluggy = "^1.0.0"
28-
file-magic = "^0.4.0"
28+
python-magic = "^0.4.27"
2929
hyperscan = "^0.3.0"
3030
lark = "^1.1.2"
3131
lz4 = "^4.0.0"

unblob/report.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,26 @@ def from_path(cls, path: Path):
140140
)
141141

142142

143+
# libmagic helpers
144+
# file magic uses a rule-set to guess the file type, however as rules are added they could
145+
# shadow each other. File magic uses rule priorities to determine which is the best matching
146+
# rule, however this could shadow other valid matches as well, which could eventually break
147+
# any further processing that depends on magic.
148+
# By enabling keep_going (which eventually enables MAGIC_CONTINUE) all matching patterns
149+
# will be included in the magic string at the cost of being a bit slower, but increasing
150+
# accuracy by no shadowing rules.
151+
get_magic = magic.Magic(keep_going=True).from_file
152+
get_mime_type = magic.Magic(mime=True).from_file
153+
154+
143155
@attr.define(kw_only=True)
144156
class FileMagicReport(Report):
145157
magic: str
146158
mime_type: str
147159

148160
@classmethod
149161
def from_path(cls, path: Path):
150-
detected = magic.detect_from_filename(path)
151-
return cls(magic=detected.name, mime_type=detected.mime_type)
162+
return cls(magic=get_magic(path), mime_type=get_mime_type(path))
152163

153164

154165
@attr.define(kw_only=True)

vulture_whitelist.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from unblob.file_utils import File, iterbits, round_down
66
from unblob.models import _JSONEncoder
77
from unblob.parser import _HexStringToRegex
8-
from unblob.report import ChunkReport
8+
from unblob.report import ChunkReport, FileMagicReport
99

1010
_HexStringToRegex.literal
1111
_HexStringToRegex.wildcard
@@ -16,6 +16,8 @@
1616
_JSONEncoder.default
1717

1818
ChunkReport.handler_name
19+
FileMagicReport.magic
20+
FileMagicReport.mime_type
1921

2022
sys.breakpointhook
2123
cli.cli.context_class

0 commit comments

Comments
 (0)