|
1 | 1 | import json |
2 | 2 | import logging |
3 | 3 | import os |
4 | | -import unittest |
5 | 4 | from decimal import Decimal |
6 | 5 |
|
| 6 | +import pytest |
7 | 7 | import yaml |
8 | 8 | from rdflib import Graph, Namespace |
9 | 9 |
|
|
41 | 41 | WD = Namespace("http://www.wikidata.org/entity/") |
42 | 42 |
|
43 | 43 |
|
44 | | -class LoadersDumpersTestCase(unittest.TestCase): |
45 | | - def setUp(self): |
46 | | - view = SchemaView(SCHEMA) |
47 | | - container: Container |
48 | | - container = yaml_loader.load(DATA, target_class=Container) |
49 | | - self._check_objs(view, container) |
50 | | - test_fn = OUT_TTL |
51 | | - rdflib_dumper.dump(container, schemaview=view, to_file=test_fn, prefix_map=prefix_map) |
52 | | - container = rdflib_loader.load(test_fn, target_class=Container, schemaview=view, prefix_map=prefix_map) |
53 | | - self._check_objs(view, container) |
54 | | - test_fn = OUT_JSON |
55 | | - json_dumper.dump(container, to_file=test_fn) |
56 | | - container = json_loader.load(test_fn, target_class=Container) |
57 | | - self._check_objs(view, container) |
58 | | - test_fn = OUT_YAML |
59 | | - yaml_dumper.dump(container, to_file=test_fn) |
60 | | - container = yaml_loader.load(test_fn, target_class=Container) |
61 | | - self._check_objs(view, container) |
62 | | - # TODO: use jsonpatch to compare files |
63 | | - |
64 | | - def test_load_from_list(self): |
65 | | - """ |
66 | | - Tests the load_any loader method, which can be used to load directly to a list |
67 | | - """ |
68 | | - view = SchemaView(SCHEMA) |
69 | | - with open(DATA, encoding="UTF-8") as stream: |
70 | | - data = yaml.safe_load(stream) |
71 | | - # persons = yaml_loader.load_source(data, target_class=Person) |
72 | | - # container = Container(persons=persons) |
73 | | - person_dicts = data["persons"] |
74 | | - tuples = [(yaml_loader, yaml.dump(person_dicts)), (json_loader, json.dumps(person_dicts, default=str))] |
75 | | - for loader, person_list_str in tuples: |
76 | | - persons = loader.loads_any(person_list_str, target_class=Person) |
77 | | - assert isinstance(persons, list) |
78 | | - assert isinstance(persons[0], Person) |
79 | | - [p1] = [p for p in persons if p.id == "P:001"] |
80 | | - [p2] = [p for p in persons if p.id == "P:002"] |
81 | | - self.assertEqual(p1.name, "fred bloggs") |
82 | | - self.assertEqual(p2.name, "joe schmö") |
83 | | - self.assertEqual(p1.age_in_years, 33) |
84 | | - self.assertEqual(p1.gender.code.text, "cisgender man") |
85 | | - self.assertEqual(p2.gender.code.text, "transgender man") |
86 | | - |
87 | | - def test_encoding(self): |
88 | | - """ |
89 | | - This will reveal if generated yaml or json files are utf-8 encoded |
90 | | - """ |
91 | | - # pyyaml or json read non-ascii strings just fine no matter if the |
92 | | - # file is ascii or utf-8 encoded. So we use Python's open function |
93 | | - # to detect undesired ascii encoding. (linkml issue #634) |
94 | | - with open(OUT_YAML, encoding="UTF-8") as f: |
95 | | - [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
96 | | - self.assertIn("joe schmö", p2_name_line) |
97 | | - |
98 | | - with open(OUT_JSON, encoding="UTF-8") as f: |
99 | | - [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
100 | | - self.assertIn("joe schmö", p2_name_line) |
101 | | - |
102 | | - def _check_objs(self, view: SchemaView, container: Container): |
103 | | - persons = container.persons |
104 | | - orgs = container.organizations.values() |
| 44 | +@pytest.fixture(scope="module") |
| 45 | +def loader_dumper_setup(): |
| 46 | + """Set up loader/dumper test environment with round-trip testing.""" |
| 47 | + view = SchemaView(SCHEMA) |
| 48 | + container: Container |
| 49 | + container = yaml_loader.load(DATA, target_class=Container) |
| 50 | + _check_objs(view, container) |
| 51 | + |
| 52 | + # Test RDF round-trip |
| 53 | + test_fn = OUT_TTL |
| 54 | + rdflib_dumper.dump(container, schemaview=view, to_file=test_fn, prefix_map=prefix_map) |
| 55 | + container = rdflib_loader.load(test_fn, target_class=Container, schemaview=view, prefix_map=prefix_map) |
| 56 | + _check_objs(view, container) |
| 57 | + |
| 58 | + # Test JSON round-trip |
| 59 | + test_fn = OUT_JSON |
| 60 | + json_dumper.dump(container, to_file=test_fn) |
| 61 | + container = json_loader.load(test_fn, target_class=Container) |
| 62 | + _check_objs(view, container) |
| 63 | + |
| 64 | + # Test YAML round-trip |
| 65 | + test_fn = OUT_YAML |
| 66 | + yaml_dumper.dump(container, to_file=test_fn) |
| 67 | + container = yaml_loader.load(test_fn, target_class=Container) |
| 68 | + _check_objs(view, container) |
| 69 | + # TODO: use jsonpatch to compare files |
| 70 | + |
| 71 | + return {"view": view, "container": container} |
| 72 | + |
| 73 | + |
| 74 | +def test_load_from_list(loader_dumper_setup): |
| 75 | + """ |
| 76 | + Tests the load_any loader method, which can be used to load directly to a list |
| 77 | + """ |
| 78 | + view = SchemaView(SCHEMA) |
| 79 | + with open(DATA, encoding="UTF-8") as stream: |
| 80 | + data = yaml.safe_load(stream) |
| 81 | + person_dicts = data["persons"] |
| 82 | + tuples = [(yaml_loader, yaml.dump(person_dicts)), (json_loader, json.dumps(person_dicts, default=str))] |
| 83 | + for loader, person_list_str in tuples: |
| 84 | + persons = loader.loads_any(person_list_str, target_class=Person) |
| 85 | + assert isinstance(persons, list) |
| 86 | + assert isinstance(persons[0], Person) |
105 | 87 | [p1] = [p for p in persons if p.id == "P:001"] |
106 | 88 | [p2] = [p for p in persons if p.id == "P:002"] |
107 | | - [o1] = [o for o in orgs if o.id == "ROR:1"] |
108 | | - [o2] = [o for o in orgs if o.id == "ROR:2"] |
109 | | - [o3] = [o for o in orgs if o.id == "ROR:3"] |
110 | | - [o4] = [o for o in orgs if o.id == "ROR:4"] |
111 | | - o1cats = [c.code.text for c in o1.categories] |
112 | | - o2cats = [c.code.text for c in o2.categories] |
113 | | - self.assertEqual(p1.name, "fred bloggs") |
114 | | - self.assertEqual(p2.name, "joe schmö") |
115 | | - self.assertEqual(p1.age_in_years, 33) |
116 | | - self.assertEqual(p1.gender.code.text, "cisgender man") |
117 | | - self.assertEqual(p2.gender.code.text, "transgender man") |
118 | | - self.assertCountEqual(o1cats, ["non profit", "charity"]) |
119 | | - self.assertCountEqual(o2cats, ["shell company"]) |
120 | | - p2: Person |
121 | | - emp = p2.has_employment_history[0] |
122 | | - self.assertEqual(emp.started_at_time, "2019-01-01") |
123 | | - self.assertEqual(emp.is_current, True) |
124 | | - self.assertEqual(emp.employed_at, o1.id) |
125 | | - frel = p2.has_familial_relationships[0] |
126 | | - self.assertEqual(frel.related_to, p1.id) |
127 | | - # TODO: check PV vs PVText |
128 | | - self.assertEqual(str(frel.type), "SIBLING_OF") |
129 | | - med = p2.has_medical_history[0] |
130 | | - self.assertEqual(med.in_location, "GEO:1234") |
131 | | - self.assertEqual(med.diagnosis.id, "CODE:D0001") |
132 | | - self.assertEqual(med.diagnosis.name, "headache") |
133 | | - self.assertEqual(med.diagnosis.code_system, "CODE:D") |
134 | | - # Check decimal representation |
135 | | - self.assertEqual(o1.score, Decimal(1)) |
136 | | - self.assertEqual(o2.score, Decimal("1.5")) |
137 | | - self.assertEqual(o3.score, Decimal(1)) |
138 | | - self.assertEqual(o4.score, Decimal(1)) |
139 | | - self.assertEqual(o1.min_salary, Decimal("99999.00")) |
140 | | - |
141 | | - def test_edge_cases(self): |
142 | | - """ |
143 | | - Tests various edge cases: |
144 | | -
|
145 | | - - unprocessed triples (triples that cannot be reached via root objects) |
146 | | - - mismatch between expected range categories (Type vs Class) and value (Literal vs Node) |
147 | | - - complex range expressions (e.g. modeling a range as being EITHER string OR object |
148 | | - """ |
149 | | - # schema with following characterics: |
150 | | - # - reified triples |
151 | | - # - object has a complex union range (experimental new feature) |
152 | | - view = SchemaView(os.path.join(INPUT_DIR, "complex_range_example.yaml")) |
153 | | - graph = Graph() |
154 | | - taxon_prefix_map = { |
155 | | - "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", |
156 | | - "RO": "http://purl.obolibrary.org/obo/RO_", |
157 | | - } |
158 | | - # this graph has the following characteristics |
159 | | - # - blank nodes to represent statements |
160 | | - # - some triples not reachable from roots |
161 | | - # - implicit schema with complex ranges (rdf:object has range of either node or literal) |
162 | | - graph.parse(os.path.join(INPUT_DIR, "bacteria-taxon-class.ttl"), format="ttl") |
163 | | - objs = rdflib_loader.from_rdf_graph( |
| 89 | + assert p1.name == "fred bloggs" |
| 90 | + assert p2.name == "joe schmö" |
| 91 | + assert p1.age_in_years == 33 |
| 92 | + assert p1.gender.code.text == "cisgender man" |
| 93 | + assert p2.gender.code.text == "transgender man" |
| 94 | + |
| 95 | + |
| 96 | +def test_encoding(loader_dumper_setup): |
| 97 | + """ |
| 98 | + This will reveal if generated yaml or json files are utf-8 encoded |
| 99 | + """ |
| 100 | + # pyyaml or json read non-ascii strings just fine no matter if the |
| 101 | + # file is ascii or utf-8 encoded. So we use Python's open function |
| 102 | + # to detect undesired ascii encoding. (linkml issue #634) |
| 103 | + with open(OUT_YAML, encoding="UTF-8") as f: |
| 104 | + [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
| 105 | + assert "joe schmö" in p2_name_line |
| 106 | + |
| 107 | + with open(OUT_JSON, encoding="UTF-8") as f: |
| 108 | + [p2_name_line] = [l for l in f.readlines() if "joe schm" in l] |
| 109 | + assert "joe schmö" in p2_name_line |
| 110 | + |
| 111 | + |
| 112 | +def _check_objs(view: SchemaView, container: Container): |
| 113 | + """Helper function to check container objects.""" |
| 114 | + persons = container.persons |
| 115 | + orgs = container.organizations.values() |
| 116 | + [p1] = [p for p in persons if p.id == "P:001"] |
| 117 | + [p2] = [p for p in persons if p.id == "P:002"] |
| 118 | + [o1] = [o for o in orgs if o.id == "ROR:1"] |
| 119 | + [o2] = [o for o in orgs if o.id == "ROR:2"] |
| 120 | + [o3] = [o for o in orgs if o.id == "ROR:3"] |
| 121 | + [o4] = [o for o in orgs if o.id == "ROR:4"] |
| 122 | + o1cats = [c.code.text for c in o1.categories] |
| 123 | + o2cats = [c.code.text for c in o2.categories] |
| 124 | + assert p1.name == "fred bloggs" |
| 125 | + assert p2.name == "joe schmö" |
| 126 | + assert p1.age_in_years == 33 |
| 127 | + assert p1.gender.code.text == "cisgender man" |
| 128 | + assert p2.gender.code.text == "transgender man" |
| 129 | + assert sorted(o1cats) == sorted(["non profit", "charity"]) |
| 130 | + assert sorted(o2cats) == sorted(["shell company"]) |
| 131 | + p2: Person |
| 132 | + emp = p2.has_employment_history[0] |
| 133 | + assert emp.started_at_time == "2019-01-01" |
| 134 | + assert emp.is_current == True |
| 135 | + assert emp.employed_at == o1.id |
| 136 | + frel = p2.has_familial_relationships[0] |
| 137 | + assert frel.related_to == p1.id |
| 138 | + # TODO: check PV vs PVText |
| 139 | + assert str(frel.type) == "SIBLING_OF" |
| 140 | + med = p2.has_medical_history[0] |
| 141 | + assert med.in_location == "GEO:1234" |
| 142 | + assert med.diagnosis.id == "CODE:D0001" |
| 143 | + assert med.diagnosis.name == "headache" |
| 144 | + assert med.diagnosis.code_system == "CODE:D" |
| 145 | + # Check decimal representation |
| 146 | + assert o1.score == Decimal(1) |
| 147 | + assert o2.score == Decimal("1.5") |
| 148 | + assert o3.score == Decimal(1) |
| 149 | + assert o4.score == Decimal(1) |
| 150 | + assert o1.min_salary == Decimal("99999.00") |
| 151 | + |
| 152 | + |
| 153 | +def test_edge_cases(loader_dumper_setup): |
| 154 | + """ |
| 155 | + Tests various edge cases: |
| 156 | +
|
| 157 | + - unprocessed triples (triples that cannot be reached via root objects) |
| 158 | + - mismatch between expected range categories (Type vs Class) and value (Literal vs Node) |
| 159 | + - complex range expressions (e.g. modeling a range as being EITHER string OR object |
| 160 | + """ |
| 161 | + # schema with following characterics: |
| 162 | + # - reified triples |
| 163 | + # - object has a complex union range (experimental new feature) |
| 164 | + view = SchemaView(os.path.join(INPUT_DIR, "complex_range_example.yaml")) |
| 165 | + graph = Graph() |
| 166 | + taxon_prefix_map = { |
| 167 | + "NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_", |
| 168 | + "RO": "http://purl.obolibrary.org/obo/RO_", |
| 169 | + } |
| 170 | + # this graph has the following characteristics |
| 171 | + # - blank nodes to represent statements |
| 172 | + # - some triples not reachable from roots |
| 173 | + # - implicit schema with complex ranges (rdf:object has range of either node or literal) |
| 174 | + graph.parse(os.path.join(INPUT_DIR, "bacteria-taxon-class.ttl"), format="ttl") |
| 175 | + objs = rdflib_loader.from_rdf_graph( |
| 176 | + graph, |
| 177 | + target_class=NodeObject, |
| 178 | + schemaview=view, |
| 179 | + cast_literals=False, ## strict |
| 180 | + allow_unprocessed_triples=True, ## known issue |
| 181 | + prefix_map=taxon_prefix_map, |
| 182 | + ) |
| 183 | + [obj] = objs |
| 184 | + for x in obj.statements: |
| 185 | + assert x.subject is None |
| 186 | + assert x.predicate is not None |
| 187 | + assert x.object is not None |
| 188 | + logger.info(f" x={x}") |
| 189 | + # ranges that are objects are contracted |
| 190 | + assert Triple(subject=None, predicate="rdfs:subClassOf", object="owl:Thing") in obj.statements |
| 191 | + assert Triple(subject=None, predicate="rdfs:subClassOf", object="NCBITaxon:1") in obj.statements |
| 192 | + # string ranges |
| 193 | + assert Triple(subject=None, predicate="rdfs:label", object="Bacteria") in obj.statements |
| 194 | + with pytest.raises(ValueError): |
| 195 | + rdflib_loader.from_rdf_graph( |
164 | 196 | graph, |
165 | 197 | target_class=NodeObject, |
166 | 198 | schemaview=view, |
167 | | - cast_literals=False, ## strict |
168 | | - allow_unprocessed_triples=True, ## known issue |
| 199 | + cast_literals=False, |
| 200 | + allow_unprocessed_triples=False, |
169 | 201 | prefix_map=taxon_prefix_map, |
170 | 202 | ) |
171 | | - [obj] = objs |
172 | | - for x in obj.statements: |
173 | | - assert x.subject is None |
174 | | - assert x.predicate is not None |
175 | | - assert x.object is not None |
176 | | - logger.info(f" x={x}") |
177 | | - # ranges that are objects are contracted |
178 | | - assert Triple(subject=None, predicate="rdfs:subClassOf", object="owl:Thing") in obj.statements |
179 | | - assert Triple(subject=None, predicate="rdfs:subClassOf", object="NCBITaxon:1") in obj.statements |
180 | | - # string ranges |
181 | | - assert Triple(subject=None, predicate="rdfs:label", object="Bacteria") in obj.statements |
182 | | - with self.assertRaises(ValueError) as context: |
183 | | - rdflib_loader.from_rdf_graph( |
184 | | - graph, |
185 | | - target_class=NodeObject, |
186 | | - schemaview=view, |
187 | | - cast_literals=False, |
188 | | - allow_unprocessed_triples=False, |
189 | | - prefix_map=taxon_prefix_map, |
190 | | - ) |
191 | | - logger.error("Passed unexpectedly: there are known to be unreachable triples") |
192 | | - # removing complex range, object has a range of string |
193 | | - view.schema.slots["object"].exactly_one_of = [] |
194 | | - view.set_modified() |
| 203 | + logger.error("Passed unexpectedly: there are known to be unreachable triples") |
| 204 | + # removing complex range, object has a range of string |
| 205 | + view.schema.slots["object"].exactly_one_of = [] |
| 206 | + view.set_modified() |
| 207 | + rdflib_loader.from_rdf_graph( |
| 208 | + graph, |
| 209 | + target_class=NodeObject, |
| 210 | + schemaview=view, |
| 211 | + cast_literals=True, ## required to pass |
| 212 | + allow_unprocessed_triples=True, |
| 213 | + prefix_map=taxon_prefix_map, |
| 214 | + ) |
| 215 | + with pytest.raises(ValueError): |
195 | 216 | rdflib_loader.from_rdf_graph( |
196 | 217 | graph, |
197 | 218 | target_class=NodeObject, |
198 | 219 | schemaview=view, |
199 | | - cast_literals=True, ## required to pass |
| 220 | + cast_literals=False, |
200 | 221 | allow_unprocessed_triples=True, |
201 | 222 | prefix_map=taxon_prefix_map, |
202 | 223 | ) |
203 | | - with self.assertRaises(ValueError) as context: |
204 | | - rdflib_loader.from_rdf_graph( |
205 | | - graph, |
206 | | - target_class=NodeObject, |
207 | | - schemaview=view, |
208 | | - cast_literals=False, |
209 | | - allow_unprocessed_triples=True, |
210 | | - prefix_map=taxon_prefix_map, |
211 | | - ) |
212 | | - logger.error("Passed unexpectedly: rdf:object is known to have a mix of literals and nodes") |
213 | | - |
214 | | - |
215 | | -if __name__ == "__main__": |
216 | | - unittest.main() |
| 224 | + logger.error("Passed unexpectedly: rdf:object is known to have a mix of literals and nodes") |
0 commit comments