88from typing import Generic , Optional , TypeVar , Union
99
1010import attrs
11+ from pydantic import BaseModel , TypeAdapter
1112from structlog import get_logger
1213
1314from .file_utils import Endian , File , InvalidInputFormat , StructParser
@@ -61,12 +62,11 @@ def __post_init__(self):
6162 self .fully_supported = len (self .limitations ) == 0
6263
6364
64- @attrs .define (frozen = True )
65- class Task :
65+ class Task (BaseModel ):
6666 path : Path
6767 depth : int
6868 blob_id : str
69- is_multi_file : bool = attrs . field ( default = False )
69+ is_multi_file : bool = False
7070
7171
7272@attrs .define
@@ -228,11 +228,10 @@ def as_report(self, extraction_reports: list[Report]) -> MultiFileReport:
228228ReportType = TypeVar ("ReportType" , bound = Report )
229229
230230
231- @attrs .define
232- class TaskResult :
231+ class TaskResult (BaseModel ):
233232 task : Task
234- reports : list [Report ] = attrs . field ( factory = list )
235- subtasks : list [Task ] = attrs . field ( factory = list )
233+ reports : list [Report ] = []
234+ subtasks : list [Task ] = []
236235
237236 def add_report (self , report : Report ):
238237 self .reports .append (report )
@@ -244,9 +243,8 @@ def filter_reports(self, report_class: type[ReportType]) -> list[ReportType]:
244243 return [report for report in self .reports if isinstance (report , report_class )]
245244
246245
247- @attrs .define
248- class ProcessResult :
249- results : list [TaskResult ] = attrs .field (factory = list )
246+ class ProcessResult (BaseModel ):
247+ results : list [TaskResult ] = []
250248
251249 @property
252250 def errors (self ) -> list [ErrorReport ]:
@@ -268,7 +266,9 @@ def register(self, result: TaskResult):
268266 self .results .append (result )
269267
270268 def to_json (self , indent = " " ):
271- return to_json (self .results , indent = indent )
269+ return json .dumps (
270+ [result .model_dump (mode = "json" ) for result in self .results ], indent = indent
271+ )
272272
273273 def get_output_dir (self ) -> Optional [Path ]:
274274 try :
@@ -285,37 +285,20 @@ def get_output_dir(self) -> Optional[Path]:
285285 return None
286286
287287
288- class _JSONEncoder (json .JSONEncoder ):
289- def default (self , o ):
290- obj = o
291- if attrs .has (type (obj )):
292- extend_attr_output = True
293- attr_output = attrs .asdict (obj , recurse = not extend_attr_output )
294- attr_output ["__typename__" ] = obj .__class__ .__name__
295- return attr_output
296-
297- if isinstance (obj , Enum ):
298- return obj .name
299-
300- if isinstance (obj , Path ):
301- return str (obj )
302-
303- if isinstance (obj , bytes ):
304- try :
305- return obj .decode ()
306- except UnicodeDecodeError :
307- return str (obj )
288+ ReportModel = list [TaskResult ]
289+ ReportModelAdapter = TypeAdapter (ReportModel )
290+ """Use this for deserialization (import JSON report back into Python
291+ objects) of the JSON report.
308292
309- logger .error ("JSONEncoder met a non-JSON encodable value" , obj = obj )
310- # the usual fail path of custom JSONEncoders is to call the parent and let it fail
311- # return json.JSONEncoder.default(self, obj)
312- # instead of failing, just return something usable
313- return f"Non-JSON encodable value: { obj } "
293+ For example:
314294
295+ with open('report.json', 'r') as f:
296+ data = f.read()
297+ report_data = ReportModelAdapter.validate_json(data)
315298
316- def to_json ( obj , indent = " " ) -> str :
317- """Encode any UnBlob object as a serialized JSON."""
318- return json . dumps ( obj , cls = _JSONEncoder , indent = indent )
299+ For another example see :
300+ tests/test_models.py::Test_to_json::test_process_result_deserialization
301+ """
319302
320303
321304class ExtractError (Exception ):
0 commit comments