Skip to content

Commit 6556a3e

Browse files
authored
Merge pull request #269 from linkml/issue-1404-load-as-dict
Add `load_as_dict` method to Loader class
2 parents ad5c7f0 + 62bacfd commit 6556a3e

File tree

6 files changed

+131
-64
lines changed

6 files changed

+131
-64
lines changed

linkml_runtime/loaders/delimited_file_loader.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,34 +19,45 @@ class DelimitedFileLoader(Loader, ABC):
1919
def delimiter(self):
2020
pass
2121

22+
def load_as_dict(self,
23+
source: str,
24+
index_slot: SlotDefinitionName = None,
25+
schema: SchemaDefinition = None,
26+
schemaview: SchemaView = None,
27+
**kwargs) -> Union[dict, List[dict]]:
28+
json_str = self._get_json_str_to_load(source, index_slot, schema, schemaview, **kwargs)
29+
return JSONLoader().load_as_dict(json_str)
2230

2331
def load_any(self, *args, **kwargs) -> Union[YAMLRoot, List[YAMLRoot]]:
2432
return self.load(*args, **kwargs)
2533

26-
2734
def loads(self, input,
2835
target_class: Type[Union[BaseModel, YAMLRoot]],
2936
index_slot: SlotDefinitionName = None,
3037
schema: SchemaDefinition = None,
3138
schemaview: SchemaView = None,
3239
**kwargs) -> str:
33-
if schemaview is None:
34-
schemaview = SchemaView(schema)
35-
configmap = get_configmap(schemaview, index_slot)
36-
config = GlobalConfig(key_configs=configmap, csv_delimiter=self.delimiter)
37-
objs = unflatten_from_csv(input, config=config, **kwargs)
38-
return JSONLoader().loads(json.dumps({index_slot: objs}), target_class=target_class)
40+
json_str = self._get_json_str_to_load(input, index_slot, schema, schemaview, **kwargs)
41+
return JSONLoader().loads(json_str, target_class=target_class)
3942

4043
def load(self, source: str,
4144
target_class: Type[Union[BaseModel, YAMLRoot]],
4245
index_slot: SlotDefinitionName = None,
4346
schema: SchemaDefinition = None,
4447
schemaview: SchemaView = None,
4548
**kwargs) -> str:
49+
json_str = self._get_json_str_to_load(source, index_slot, schema, schemaview, **kwargs)
50+
return JSONLoader().loads(json_str, target_class=target_class)
51+
52+
def _get_json_str_to_load(self,
53+
input,
54+
index_slot: SlotDefinitionName = None,
55+
schema: SchemaDefinition = None,
56+
schemaview: SchemaView = None,
57+
**kwargs):
4658
if schemaview is None:
4759
schemaview = SchemaView(schema)
4860
configmap = get_configmap(schemaview, index_slot)
4961
config = GlobalConfig(key_configs=configmap, csv_delimiter=self.delimiter)
50-
print(f'Loading from {source}')
51-
objs = unflatten_from_csv(source, config=config, **kwargs)
52-
return JSONLoader().loads(json.dumps({index_slot: objs}), target_class=target_class)
62+
objs = unflatten_from_csv(input, config=config, **kwargs)
63+
return json.dumps({index_slot: objs})

linkml_runtime/loaders/json_loader.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,27 @@
1010

1111
class JSONLoader(Loader):
1212

13-
def load_any(self, source: Union[str, dict, TextIO], target_class: Type[Union[BaseModel, YAMLRoot]], *, base_dir: Optional[str] = None,
14-
metadata: Optional[FileInfo] = None, **_) -> Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]:
15-
def loader(data: Union[str, dict], _: FileInfo) -> Optional[Dict]:
16-
data_as_dict = json.loads(data) if isinstance(data, str) else data
17-
if isinstance(data_as_dict, list):
18-
return self.json_clean(data_as_dict)
13+
def load_as_dict(self,
14+
source: Union[str, dict, TextIO],
15+
*,
16+
base_dir: Optional[str] = None,
17+
metadata: Optional[FileInfo] = None) -> Union[dict, List[dict]]:
18+
data = self._read_source(source, base_dir=base_dir, metadata=metadata, accept_header="application/ld+json, application/json, text/json")
19+
data_as_dict = json.loads(data) if isinstance(data, str) else data
20+
return self.json_clean(data_as_dict)
21+
22+
def load_any(self,
23+
source: Union[str, dict, TextIO],
24+
target_class: Type[Union[BaseModel, YAMLRoot]],
25+
*,
26+
base_dir: Optional[str] = None,
27+
metadata: Optional[FileInfo] = None,
28+
**_) -> Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]:
29+
data_as_dict = self.load_as_dict(source, base_dir=base_dir, metadata=metadata)
30+
31+
if isinstance(data_as_dict, dict):
1932
typ = data_as_dict.pop('@type', None)
2033
if typ and typ != target_class.__name__:
2134
logging.warning(f"Warning: input type mismatch. Expected: {target_class.__name__}, Actual: {typ}")
22-
return self.json_clean(data_as_dict)
2335

24-
if not metadata:
25-
metadata = FileInfo()
26-
if base_dir and not metadata.base_path:
27-
metadata.base_path = base_dir
28-
return self.load_source(source, loader, target_class,
29-
accept_header="application/ld+json, application/json, text/json", metadata=metadata)
36+
return self._construct_target_class(data_as_dict, target_class)

linkml_runtime/loaders/loader_root.py

Lines changed: 46 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -52,34 +52,10 @@ def load_source(self,
5252
:return: Instance of the target class if loader worked
5353
"""
5454

55-
# Makes coding easier down the line if we've got this, even if it is strictly internal
56-
if metadata is None:
57-
metadata = FileInfo()
58-
if not isinstance(source, dict):
59-
data = hbread(source, metadata, metadata.base_path, accept_header)
60-
else:
61-
data = source
55+
data = self._read_source(source, metadata=metadata, base_dir=metadata.base_path, accept_header=accept_header)
6256
data_as_dict = loader(data, metadata)
57+
return self._construct_target_class(data_as_dict, target_class=target_class)
6358

64-
if data_as_dict:
65-
if isinstance(data_as_dict, list):
66-
if issubclass(target_class, YAMLRoot):
67-
return [target_class(**as_dict(x)) for x in data_as_dict]
68-
elif issubclass(target_class, BaseModel):
69-
return [target_class.parse_obj(**as_dict(x)) for x in data_as_dict]
70-
else:
71-
raise ValueError(f'Cannot load list of {target_class}')
72-
elif isinstance(data_as_dict, dict):
73-
if issubclass(target_class, BaseModel):
74-
return target_class.parse_obj(data_as_dict)
75-
else:
76-
return target_class(**data_as_dict)
77-
elif isinstance(data_as_dict, JsonObj):
78-
return [target_class(**as_dict(x)) for x in data_as_dict]
79-
else:
80-
raise ValueError(f'Unexpected type {data_as_dict}')
81-
else:
82-
return None
8359

8460
def load(self, *args, **kwargs) -> Union[BaseModel, YAMLRoot]:
8561
"""
@@ -97,6 +73,9 @@ def load(self, *args, **kwargs) -> Union[BaseModel, YAMLRoot]:
9773
return results
9874
else:
9975
raise ValueError(f'Result is not an instance of BaseModel or YAMLRoot: {type(results)}')
76+
77+
def load_as_dict(self, *args, **kwargs) -> Union[dict, List[dict]]:
78+
raise NotImplementedError()
10079

10180
@abstractmethod
10281
def load_any(self, source: Union[str, dict, TextIO], target_class: Type[Union[BaseModel, YAMLRoot]], *, base_dir: Optional[str] = None,
@@ -134,3 +113,44 @@ def loads(self, source: str, target_class: Type[Union[BaseModel, YAMLRoot]], *,
134113
:return: instance of taarget_class
135114
"""
136115
return self.load(source, target_class, metadata=metadata)
116+
117+
def _construct_target_class(self,
118+
data_as_dict: Union[dict, List[dict]],
119+
target_class: Union[Type[YAMLRoot], Type[BaseModel]]) -> Optional[Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]]:
120+
if data_as_dict:
121+
if isinstance(data_as_dict, list):
122+
if issubclass(target_class, YAMLRoot):
123+
return [target_class(**as_dict(x)) for x in data_as_dict]
124+
elif issubclass(target_class, BaseModel):
125+
return [target_class.parse_obj(**as_dict(x)) for x in data_as_dict]
126+
else:
127+
raise ValueError(f'Cannot load list of {target_class}')
128+
elif isinstance(data_as_dict, dict):
129+
if issubclass(target_class, BaseModel):
130+
return target_class.parse_obj(data_as_dict)
131+
else:
132+
return target_class(**data_as_dict)
133+
elif isinstance(data_as_dict, JsonObj):
134+
return [target_class(**as_dict(x)) for x in data_as_dict]
135+
else:
136+
raise ValueError(f'Unexpected type {data_as_dict}')
137+
else:
138+
return None
139+
140+
def _read_source(self,
141+
source: Union[str, dict, TextIO],
142+
*,
143+
base_dir: Optional[str] = None,
144+
metadata: Optional[FileInfo] = None,
145+
accept_header: Optional[str] = "text/plain, application/yaml;q=0.9") -> Union[dict, str]:
146+
if metadata is None:
147+
metadata = FileInfo()
148+
if base_dir and not metadata.base_path:
149+
metadata.base_path = base_dir
150+
151+
if not isinstance(source, dict):
152+
data = hbread(source, metadata, metadata.base_path, accept_header)
153+
else:
154+
data = source
155+
156+
return data

linkml_runtime/loaders/yaml_loader.py

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,31 @@ class YAMLLoader(Loader):
1414
A Loader that is capable of instantiating LinkML data objects from a YAML file
1515
"""
1616

17+
def load_as_dict(self,
18+
source: Union[str, dict, TextIO],
19+
*,
20+
base_dir: Optional[str] = None,
21+
metadata: Optional[FileInfo] = None) -> Union[dict, List[dict]]:
22+
if metadata is None:
23+
metadata = FileInfo()
24+
if base_dir and not metadata.base_path:
25+
metadata.base_path = base_dir
26+
data = self._read_source(source, base_dir=base_dir, metadata=metadata, accept_header="text/yaml, application/yaml;q=0.9")
27+
if isinstance(data, str):
28+
data = StringIO(data)
29+
if metadata and metadata.source_file:
30+
data.name = os.path.relpath(metadata.source_file, metadata.base_path)
31+
return yaml.load(data, DupCheckYamlLoader)
32+
else:
33+
return data
34+
1735
def load_any(self,
1836
source: Union[str, dict, TextIO],
1937
target_class: Union[Type[YAMLRoot],Type[BaseModel]],
2038
*, base_dir: Optional[str] = None,
2139
metadata: Optional[FileInfo] = None, **_) -> Union[YAMLRoot, List[YAMLRoot]]:
22-
def loader(data: Union[str, dict], source_file: FileInfo) -> Optional[Dict]:
23-
if isinstance(data, str):
24-
data = StringIO(data)
25-
if source_file and source_file.source_file:
26-
data.name = os.path.relpath(source_file.source_file, source_file.base_path)
27-
return yaml.load(data, DupCheckYamlLoader)
28-
else:
29-
return data
30-
31-
if not metadata:
32-
metadata = FileInfo()
33-
if base_dir and not metadata.base_path:
34-
metadata.base_path = base_dir
35-
return self.load_source(source, loader, target_class, accept_header="text/yaml, application/yaml;q=0.9",
36-
metadata=metadata)
40+
data_as_dict = self.load_as_dict(source, base_dir=base_dir, metadata=metadata)
41+
return self._construct_target_class(data_as_dict, target_class)
3742

3843
def loads_any(self, source: str, target_class: Type[Union[BaseModel, YAMLRoot]], *, metadata: Optional[FileInfo] = None, **_) -> Union[BaseModel, YAMLRoot, List[BaseModel], List[YAMLRoot]]:
3944
"""

tests/test_loaders_dumpers/test_csv_tsv_loader_dumper.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,27 @@ def test_csvgen_roundtrip(self):
6262
logging.debug(f'COMPARE 2: {data}')
6363
assert roundtrip == data
6464

65+
def test_csvgen_roundtrip_to_dict(self):
66+
schemaview = SchemaView(SCHEMA)
67+
data = yaml_loader.load(DATA, target_class=Shop)
68+
csv_dumper.dump(data, to_file=OUTPUT, index_slot='all_book_series', schemaview=schemaview)
69+
roundtrip = csv_loader.load_as_dict(OUTPUT, index_slot='all_book_series', schemaview=schemaview)
70+
assert roundtrip == json_dumper.to_dict(data)
71+
6572
def test_tsvgen_roundtrip(self):
6673
schemaview = SchemaView(SCHEMA)
6774
data = yaml_loader.load(DATA, target_class=Shop)
6875
tsv_dumper.dump(data, to_file=OUTPUT, index_slot='all_book_series', schemaview=schemaview)
6976
roundtrip = tsv_loader.load(OUTPUT, target_class=Shop, index_slot='all_book_series', schemaview=schemaview)
7077
assert roundtrip == data
7178

79+
def test_tsvgen_roundtrip_to_dict(self):
80+
schemaview = SchemaView(SCHEMA)
81+
data = yaml_loader.load(DATA, target_class=Shop)
82+
tsv_dumper.dump(data, to_file=OUTPUT, index_slot='all_book_series', schemaview=schemaview)
83+
roundtrip = tsv_loader.load_as_dict(OUTPUT, index_slot='all_book_series', schemaview=schemaview)
84+
assert roundtrip == json_dumper.to_dict(data)
85+
7286
def test_csvgen_unroundtrippable(self):
7387
schemaview = SchemaView(SCHEMA)
7488
#schema = YAMLGenerator(SCHEMA).schema

tests/test_loaders_dumpers/test_loaders.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,16 @@ def test_json_loader(self):
2929
""" Load obo_sample.json, emit obo_sample_json.yaml and check the results """
3030
self.loader_test('obo_sample.json', Package, json_loader)
3131

32+
def test_json_load_to_dict(self):
33+
data = json_loader.load_as_dict('obo_sample.json', base_dir=self.env.indir)
34+
assert isinstance(data, dict)
35+
assert "system" in data
36+
37+
def test_yaml_load_to_dict(self):
38+
data = yaml_loader.load_as_dict('obo_sample.yaml', base_dir=self.env.indir)
39+
assert isinstance(data, dict)
40+
assert "system" in data
41+
3242
@unittest.skipIf(True, "This test will not work until https://github.com/digitalbazaar/pyld/issues/149 is fixed")
3343
def test_rdf_loader(self):
3444
""" Load obo_sample.ttl, emit obo_sample_ttl.yaml and check the results

0 commit comments

Comments
 (0)