Skip to content

Commit 39ba033

Browse files
committed
feat(cli): provide human readable reporting in console output
Provide some information about identified chunks, extracted content, and encountered errors. All the information is obtained from the ProcessResult object. Example: ---- poetry run unblob --report /tmp/report.json -f -e /tmp/out sample.img Extracted files: 3616 Extracted directories: 609 Extracted links: 782 Extraction directory size: 298.19 MB. Chunks identification ratio: 90.74% Chunks distribution - EXTFS: 150.00 MB (54.17%) - ELF32: 73.98 MB (26.72%) - UNKNOWN: 25.64 MB (9.26%) - FAT: 16.00 MB (5.78%) - LZO: 9.28 MB (3.35%) - XZ: 1.03 MB (0.37%) - TAR: 860.00 KB (0.30%) - BZIP2: 93.56 KB (0.03%) - GZIP: 24.21 KB (0.01%) Encountered errors: 1 - Severity.WARNING: MaliciousSymlinkRemoved ----
1 parent a67afe5 commit 39ba033

File tree

6 files changed

+212
-14
lines changed

6 files changed

+212
-14
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ build/
88
.idea
99
.coverage*
1010
/.venv/
11+
unblob.log

tests/test_cli.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,16 +216,18 @@ def test_archive_success(
216216
assert result.exit_code == 0
217217
assert "error" not in result.output
218218
assert "warning" not in result.output
219+
log_path = Path("unblob.log")
219220
config = ExtractionConfig(
220221
extract_root=tmp_path,
221222
max_depth=expected_depth,
222223
entropy_depth=expected_entropy_depth,
223224
entropy_plot=bool(expected_verbosity >= 3),
224225
process_num=expected_process_num,
225226
handlers=BUILTIN_HANDLERS,
227+
verbose=expected_verbosity,
226228
)
227229
process_file_mock.assert_called_once_with(config, in_path, None)
228-
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path)
230+
logger_config_mock.assert_called_once_with(expected_verbosity, tmp_path, log_path)
229231

230232

231233
@pytest.mark.parametrize(

unblob/cli.py

Lines changed: 131 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
#!/usr/bin/env python3
2+
import atexit
23
import sys
34
from pathlib import Path
4-
from typing import Iterable, Optional
5+
from typing import Dict, Iterable, List, Optional, Tuple
56

67
import click
8+
import pkg_resources
9+
from rich.console import Console
10+
from rich.panel import Panel
11+
from rich.table import Table
712
from structlog import get_logger
813

914
from unblob.models import DirectoryHandlers, Handlers, ProcessResult
1015
from unblob.plugins import UnblobPluginManager
11-
from unblob.report import Severity
16+
from unblob.report import ChunkReport, Severity, StatReport, UnknownChunkReport
1217

1318
from .cli_options import verbosity_option
1419
from .dependencies import get_dependencies, pretty_format_dependencies
@@ -25,6 +30,11 @@
2530
logger = get_logger()
2631

2732

33+
def restore_cursor():
34+
# Restore cursor visibility
35+
sys.stdout.write("\033[?25h") # ANSI escape code to show cursor
36+
37+
2838
def show_external_dependencies(
2939
ctx: click.Context, _param: click.Option, value: bool # noqa: FBT001
3040
) -> None:
@@ -70,7 +80,7 @@ def __init__(
7080
handlers: Optional[Handlers] = None,
7181
dir_handlers: Optional[DirectoryHandlers] = None,
7282
plugin_manager: Optional[UnblobPluginManager] = None,
73-
**kwargs
83+
**kwargs,
7484
):
7585
super().__init__(*args, **kwargs)
7686
handlers = handlers or BUILTIN_HANDLERS
@@ -157,6 +167,13 @@ def __init__(
157167
type=click.Path(path_type=Path),
158168
help="File to store metadata generated during the extraction process (in JSON format).",
159169
)
170+
@click.option(
171+
"--log",
172+
"log_path",
173+
default=Path("unblob.log"),
174+
type=click.Path(path_type=Path),
175+
help="File to save logs (in text format). Defaults to unblob.log.",
176+
)
160177
@click.option(
161178
"-s",
162179
"--skip_extraction",
@@ -185,6 +202,7 @@ def cli(
185202
file: Path,
186203
extract_root: Path,
187204
report_file: Optional[Path],
205+
log_path: Path,
188206
force: bool, # noqa: FBT001
189207
process_num: int,
190208
depth: int,
@@ -198,7 +216,7 @@ def cli(
198216
plugin_manager: UnblobPluginManager,
199217
verbose: int,
200218
) -> ProcessResult:
201-
configure_logger(verbose, extract_root)
219+
configure_logger(verbose, extract_root, log_path)
202220

203221
plugin_manager.import_plugins(plugins_path)
204222
extra_handlers = plugin_manager.load_handlers_from_plugins()
@@ -219,10 +237,14 @@ def cli(
219237
handlers=handlers,
220238
dir_handlers=dir_handlers,
221239
keep_extracted_chunks=keep_extracted_chunks,
240+
verbose=verbose,
222241
)
223242

224243
logger.info("Start processing file", file=file)
225-
return process_file(config, file, report_file)
244+
process_results = process_file(config, file, report_file)
245+
if verbose == 0:
246+
print_report(process_results)
247+
return process_results
226248

227249

228250
cli.context_class = UnblobContext
@@ -242,6 +264,108 @@ def get_exit_code_from_reports(reports: ProcessResult) -> int:
242264
return 0
243265

244266

267+
def human_size(size: float):
268+
units = ["B", "KB", "MB", "GB", "TB"]
269+
i = 0
270+
while size >= 1024 and i < len(units) - 1:
271+
size /= 1024
272+
i += 1
273+
return f"{size:.2f} {units[i]}"
274+
275+
276+
def get_chunks_distribution(task_results: List) -> Dict:
277+
chunks_distribution = {"unknown": 0}
278+
for task_result in task_results:
279+
chunk_reports = [
280+
report
281+
for report in task_result.reports
282+
if isinstance(report, (ChunkReport, UnknownChunkReport))
283+
]
284+
285+
for chunk_report in chunk_reports:
286+
if isinstance(chunk_report, UnknownChunkReport):
287+
chunks_distribution["unknown"] += chunk_report.size
288+
continue
289+
if chunk_report.handler_name not in chunks_distribution:
290+
chunks_distribution[chunk_report.handler_name] = 0
291+
chunks_distribution[chunk_report.handler_name] += chunk_report.size
292+
293+
return chunks_distribution
294+
295+
296+
def get_size_report(task_results: List) -> Tuple[int, int, int, int]:
297+
total_files = 0
298+
total_dirs = 0
299+
total_links = 0
300+
extracted_size = 0
301+
302+
for task_result in task_results:
303+
stat_reports = list(
304+
filter(lambda x: isinstance(x, StatReport), task_result.reports)
305+
)
306+
for stat_report in stat_reports:
307+
total_files += stat_report.is_file
308+
total_dirs += stat_report.is_dir
309+
total_links += stat_report.is_link
310+
if stat_report.is_file:
311+
extracted_size += stat_report.size
312+
313+
return total_files, total_dirs, total_links, extracted_size
314+
315+
316+
def print_report(reports: ProcessResult):
317+
total_files, total_dirs, total_links, extracted_size = get_size_report(
318+
reports.results
319+
)
320+
chunks_distribution = get_chunks_distribution(reports.results)
321+
322+
valid_size = 0
323+
total_size = 0
324+
for handler, size in chunks_distribution.items():
325+
if handler != "unknown":
326+
valid_size += size
327+
total_size += size
328+
329+
if total_size == 0:
330+
return
331+
332+
summary = Panel(
333+
f"""Extracted files: [#00FFC8]{total_files}[/#00FFC8]
334+
Extracted directories: [#00FFC8]{total_dirs}[/#00FFC8]
335+
Extracted links: [#00FFC8]{total_links}[/#00FFC8]
336+
Extraction directory size: [#00FFC8]{human_size(extracted_size)}[/#00FFC8]
337+
Chunks identification ratio: [#00FFC8]{(valid_size/total_size) * 100:0.2f}%[/#00FFC8]""",
338+
subtitle="Summary",
339+
title=f"unblob ({get_version()})",
340+
)
341+
342+
console = Console()
343+
console.print(summary)
344+
345+
chunks_table = Table(title="Chunks distribution")
346+
chunks_table.add_column("Chunk type", justify="left", style="#00FFC8", no_wrap=True)
347+
chunks_table.add_column("Size", justify="center", style="#00FFC8", no_wrap=True)
348+
chunks_table.add_column("Ratio", justify="center", style="#00FFC8", no_wrap=True)
349+
350+
for handler, size in sorted(
351+
chunks_distribution.items(), key=lambda item: item[1], reverse=True
352+
):
353+
chunks_table.add_row(
354+
handler.upper(), human_size(size), f"{(size/total_size) * 100:0.2f}%"
355+
)
356+
357+
console.print(chunks_table)
358+
359+
if len(reports.errors):
360+
errors_table = Table(title="Encountered errors")
361+
errors_table.add_column("Severity", justify="left", style="cyan", no_wrap=True)
362+
errors_table.add_column("Name", justify="left", style="cyan", no_wrap=True)
363+
364+
for error in reports.errors:
365+
errors_table.add_row(str(error.severity), error.__class__.__name__)
366+
console.print(errors_table)
367+
368+
245369
def main():
246370
try:
247371
# Click argument parsing
@@ -261,6 +385,8 @@ def main():
261385
except Exception:
262386
logger.exception("Unhandled exception during unblob")
263387
sys.exit(1)
388+
finally:
389+
atexit.register(restore_cursor)
264390

265391
sys.exit(get_exit_code_from_reports(reports))
266392

unblob/logging.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import pdb # noqa: T100
33
import sys
4+
from logging.handlers import WatchedFileHandler
45
from os import getpid
56
from pathlib import Path
67
from typing import Any
@@ -87,27 +88,67 @@ def filter_(_logger, _method_name: str, event_dict: structlog.types.EventDict):
8788
return filter_
8889

8990

90-
def configure_logger(verbosity_level: int, extract_root: Path):
91-
log_level = logging.DEBUG if verbosity_level > 0 else logging.INFO
91+
def configure_logger(verbosity_level: int, extract_root: Path, log_path: Path):
92+
if log_path.exists():
93+
log_path.unlink()
94+
95+
log_level = logging.DEBUG if verbosity_level > 0 else logging.CRITICAL
96+
9297
processors = [
98+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
99+
]
100+
101+
shared_processors = [
93102
structlog.stdlib.add_log_level,
94-
filter_debug_logs(verbosity_level),
103+
filter_debug_logs(verbosity_level or 2),
95104
structlog.processors.TimeStamper(
96105
key="timestamp", fmt="%Y-%m-%d %H:%M.%S", utc=True
97106
),
98107
pretty_print_types(extract_root),
99108
add_pid_to_log_message,
100109
structlog.processors.UnicodeDecoder(),
101110
structlog.processors.StackInfoRenderer(),
102-
structlog.processors.format_exc_info,
103-
structlog.dev.ConsoleRenderer(colors=sys.stdout.isatty()),
104111
]
105112

106113
structlog.configure(
107-
wrapper_class=structlog.make_filtering_bound_logger(log_level),
108-
processors=processors,
114+
wrapper_class=structlog.make_filtering_bound_logger(logging.DEBUG),
115+
processors=shared_processors + processors,
116+
logger_factory=structlog.stdlib.LoggerFactory(),
117+
)
118+
119+
formatter = structlog.stdlib.ProcessorFormatter(
120+
foreign_pre_chain=shared_processors,
121+
processors=[
122+
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
123+
structlog.dev.ConsoleRenderer(
124+
colors=sys.stdout.isatty(),
125+
exception_formatter=structlog.dev.plain_traceback,
126+
),
127+
],
128+
)
129+
130+
file_formatter = structlog.stdlib.ProcessorFormatter(
131+
foreign_pre_chain=shared_processors,
132+
processors=[
133+
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
134+
structlog.dev.ConsoleRenderer(
135+
colors=False, exception_formatter=structlog.dev.plain_traceback
136+
),
137+
],
109138
)
110139

140+
console_handler = logging.StreamHandler(sys.stdout)
141+
console_handler.setFormatter(formatter)
142+
console_handler.setLevel(log_level)
143+
144+
file_handler = WatchedFileHandler(log_path.as_posix())
145+
file_handler.setFormatter(file_formatter)
146+
file_handler.setLevel(logging.DEBUG)
147+
148+
root_logger = logging.getLogger()
149+
root_logger.addHandler(console_handler)
150+
root_logger.addHandler(file_handler)
151+
root_logger.setLevel(logging.DEBUG)
111152
structlog.get_logger().debug(
112153
"Logging configured",
113154
vebosity_level=noformat(verbosity_level),

unblob/processing.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import attr
88
import magic
99
import plotext as plt
10+
from rich import progress
11+
from rich.style import Style
1012
from structlog import get_logger
1113
from unblob_native import math_tools as mt
1214

@@ -91,6 +93,7 @@ class ExtractionConfig:
9193
extract_suffix: str = "_extract"
9294
handlers: Handlers = BUILTIN_HANDLERS
9395
dir_handlers: DirectoryHandlers = BUILTIN_DIR_HANDLERS
96+
verbose: int = 1
9497

9598
def get_extract_dir_for(self, path: Path) -> Path:
9699
"""Return extraction dir under root with the name of path."""
@@ -143,7 +146,26 @@ def _process_task(config: ExtractionConfig, task: Task) -> ProcessResult:
143146
processor = Processor(config)
144147
aggregated_result = ProcessResult()
145148

149+
if not config.verbose:
150+
progress_display = progress.Progress(
151+
progress.TextColumn(
152+
"Extraction progress: {task.percentage:>3.0f}%",
153+
style=Style(color="#00FFC8"),
154+
),
155+
progress.BarColumn(
156+
complete_style=Style(color="#00FFC8"), style=Style(color="#002060")
157+
),
158+
)
159+
progress_display.start()
160+
overall_progress_task = progress_display.add_task("Extraction progress:")
161+
146162
def process_result(pool, result):
163+
if config.verbose == 0 and progress_display.tasks[0].total is not None:
164+
progress_display.update(
165+
overall_progress_task,
166+
advance=1,
167+
total=progress_display.tasks[0].total + len(result.subtasks),
168+
)
147169
for new_task in result.subtasks:
148170
pool.submit(new_task)
149171
aggregated_result.register(result)
@@ -158,6 +180,10 @@ def process_result(pool, result):
158180
pool.submit(task)
159181
pool.process_until_done()
160182

183+
if not config.verbose:
184+
progress_display.remove_task(overall_progress_task)
185+
progress_display.stop()
186+
161187
return aggregated_result
162188

163189

unblob/testing.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515

1616
@pytest.fixture(scope="session", autouse=True)
1717
def configure_logging(): # noqa: PT004
18-
configure_logger(verbosity_level=3, extract_root=Path(""))
18+
configure_logger(
19+
verbosity_level=3, extract_root=Path(""), log_path=Path("unblob.log")
20+
)
1921

2022
# https://pytest-cov.readthedocs.io/en/latest/subprocess-support.html#if-you-use-multiprocessing-process
2123
cleanup_on_sigterm()

0 commit comments

Comments
 (0)