Skip to content

Commit ad5c7f0

Browse files
authored
Merge pull request #268 from linkml/issue-1337-csv-delimiter
Add new TSV loader/dumper classes
2 parents 10e8d65 + a9bb788 commit ad5c7f0

File tree

9 files changed

+149
-86
lines changed

9 files changed

+149
-86
lines changed

linkml_runtime/dumpers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from linkml_runtime.dumpers.json_dumper import JSONDumper
22
from linkml_runtime.dumpers.rdf_dumper import RDFDumper
33
from linkml_runtime.dumpers.rdflib_dumper import RDFLibDumper
4+
from linkml_runtime.dumpers.tsv_dumper import TSVDumper
45
from linkml_runtime.dumpers.yaml_dumper import YAMLDumper
56
from linkml_runtime.dumpers.csv_dumper import CSVDumper
67

@@ -9,3 +10,4 @@
910
rdflib_dumper = RDFLibDumper()
1011
yaml_dumper = YAMLDumper()
1112
csv_dumper = CSVDumper()
13+
tsv_dumper = TSVDumper()
Lines changed: 5 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,8 @@
1-
import io
2-
import yaml
3-
import json
4-
from typing import Union
5-
from pydantic import BaseModel
1+
from linkml_runtime.dumpers.delimited_file_dumper import DelimitedFileDumper
62

7-
from linkml_runtime.dumpers.dumper_root import Dumper
8-
from linkml_runtime.dumpers.json_dumper import JSONDumper
9-
from linkml_runtime.utils.yamlutils import YAMLRoot
10-
from linkml_runtime.linkml_model.meta import SlotDefinitionName, SchemaDefinition
11-
from linkml_runtime.utils.schemaview import SchemaView
123

13-
from linkml_runtime.utils.csvutils import GlobalConfig, get_configmap
14-
from json_flattener import flatten_to_csv
4+
class CSVDumper(DelimitedFileDumper):
155

16-
17-
class CSVDumper(Dumper):
18-
19-
def dumps(self, element: Union[BaseModel, YAMLRoot],
20-
index_slot: SlotDefinitionName = None,
21-
schema: SchemaDefinition = None,
22-
schemaview: SchemaView = None,
23-
**kwargs) -> str:
24-
""" Return element formatted as CSV lines """
25-
json_dumper = JSONDumper()
26-
element_j = json.loads(json_dumper.dumps(element))
27-
objs = element_j[index_slot]
28-
if schemaview is None:
29-
schemaview = SchemaView(schema)
30-
configmap = get_configmap(schemaview, index_slot)
31-
config = GlobalConfig(key_configs=configmap)
32-
output = io.StringIO()
33-
flatten_to_csv(objs, output, config=config, **kwargs)
34-
return output.getvalue()
6+
@property
7+
def delimiter(self):
8+
return ","
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import io
2+
import yaml
3+
import json
4+
from abc import ABC, abstractmethod
5+
from typing import Union
6+
from pydantic import BaseModel
7+
8+
from linkml_runtime.dumpers.dumper_root import Dumper
9+
from linkml_runtime.dumpers.json_dumper import JSONDumper
10+
from linkml_runtime.utils.yamlutils import YAMLRoot
11+
from linkml_runtime.linkml_model.meta import SlotDefinitionName, SchemaDefinition
12+
from linkml_runtime.utils.schemaview import SchemaView
13+
14+
from linkml_runtime.utils.csvutils import GlobalConfig, get_configmap
15+
from json_flattener import flatten_to_csv
16+
17+
18+
class DelimitedFileDumper(Dumper, ABC):
19+
20+
@property
21+
@abstractmethod
22+
def delimiter(self):
23+
pass
24+
25+
def dumps(self, element: Union[BaseModel, YAMLRoot],
26+
index_slot: SlotDefinitionName = None,
27+
schema: SchemaDefinition = None,
28+
schemaview: SchemaView = None,
29+
**kwargs) -> str:
30+
""" Return element formatted as CSV lines """
31+
json_dumper = JSONDumper()
32+
element_j = json.loads(json_dumper.dumps(element))
33+
objs = element_j[index_slot]
34+
if schemaview is None:
35+
schemaview = SchemaView(schema)
36+
configmap = get_configmap(schemaview, index_slot)
37+
config = GlobalConfig(key_configs=configmap, csv_delimiter=self.delimiter)
38+
output = io.StringIO()
39+
flatten_to_csv(objs, output, config=config, **kwargs)
40+
return output.getvalue()
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from linkml_runtime.dumpers.delimited_file_dumper import DelimitedFileDumper
2+
3+
4+
class TSVDumper(DelimitedFileDumper):
5+
6+
@property
7+
def delimiter(self):
8+
return "\t"

linkml_runtime/loaders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from linkml_runtime.loaders.json_loader import JSONLoader
22
from linkml_runtime.loaders.rdf_loader import RDFLoader
33
from linkml_runtime.loaders.rdflib_loader import RDFLibLoader
4+
from linkml_runtime.loaders.tsv_loader import TSVLoader
45
from linkml_runtime.loaders.yaml_loader import YAMLLoader
56
from linkml_runtime.loaders.csv_loader import CSVLoader
67

@@ -9,3 +10,4 @@
910
rdflib_loader = RDFLibLoader()
1011
yaml_loader = YAMLLoader()
1112
csv_loader = CSVLoader()
13+
tsv_loader = TSVLoader()
Lines changed: 6 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,7 @@
1-
from json_flattener import unflatten_from_csv, KeyConfig, GlobalConfig, Serializer
2-
import json
3-
from typing import Type, Union, List
4-
from linkml_runtime.utils.yamlutils import YAMLRoot
5-
from pydantic import BaseModel
1+
from linkml_runtime.loaders.delimited_file_loader import DelimitedFileLoader
62

7-
from linkml_runtime.loaders.loader_root import Loader
8-
from linkml_runtime.loaders.json_loader import JSONLoader
9-
from linkml_runtime.linkml_model.meta import SlotDefinitionName, SchemaDefinition, ClassDefinition
10-
from linkml_runtime.utils.yamlutils import YAMLRoot
11-
from linkml_runtime.utils.schemaview import SchemaView
12-
from linkml_runtime.utils.csvutils import get_configmap
13-
14-
class CSVLoader(Loader):
15-
16-
def load_any(self, *args, **kwargs) -> Union[YAMLRoot, List[YAMLRoot]]:
17-
return self.load(*args, **kwargs)
18-
19-
20-
def loads(self, input,
21-
target_class: Type[Union[BaseModel, YAMLRoot]],
22-
index_slot: SlotDefinitionName = None,
23-
schema: SchemaDefinition = None,
24-
schemaview: SchemaView = None,
25-
**kwargs) -> str:
26-
if schemaview is None:
27-
schemaview = SchemaView(schema)
28-
configmap = get_configmap(schemaview, index_slot)
29-
config = GlobalConfig(key_configs=configmap)
30-
objs = unflatten_from_csv(input, config=config, **kwargs)
31-
return JSONLoader().loads(json.dumps({index_slot: objs}), target_class=target_class)
32-
33-
def load(self, source: str,
34-
target_class: Type[Union[BaseModel, YAMLRoot]],
35-
index_slot: SlotDefinitionName = None,
36-
schema: SchemaDefinition = None,
37-
schemaview: SchemaView = None,
38-
**kwargs) -> str:
39-
if schemaview is None:
40-
schemaview = SchemaView(schema)
41-
configmap = get_configmap(schemaview, index_slot)
42-
config = GlobalConfig(key_configs=configmap)
43-
print(f'Loading from {source}')
44-
objs = unflatten_from_csv(source, config=config, **kwargs)
45-
return JSONLoader().loads(json.dumps({index_slot: objs}), target_class=target_class)
3+
class CSVLoader(DelimitedFileLoader):
4+
5+
@property
6+
def delimiter(self):
7+
return ","
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
from abc import ABC, abstractmethod
2+
from json_flattener import unflatten_from_csv, KeyConfig, GlobalConfig, Serializer
3+
import json
4+
from typing import Type, Union, List
5+
from linkml_runtime.utils.yamlutils import YAMLRoot
6+
from pydantic import BaseModel
7+
8+
from linkml_runtime.loaders.loader_root import Loader
9+
from linkml_runtime.loaders.json_loader import JSONLoader
10+
from linkml_runtime.linkml_model.meta import SlotDefinitionName, SchemaDefinition, ClassDefinition
11+
from linkml_runtime.utils.yamlutils import YAMLRoot
12+
from linkml_runtime.utils.schemaview import SchemaView
13+
from linkml_runtime.utils.csvutils import get_configmap
14+
15+
class DelimitedFileLoader(Loader, ABC):
16+
17+
@property
18+
@abstractmethod
19+
def delimiter(self):
20+
pass
21+
22+
23+
def load_any(self, *args, **kwargs) -> Union[YAMLRoot, List[YAMLRoot]]:
24+
return self.load(*args, **kwargs)
25+
26+
27+
def loads(self, input,
28+
target_class: Type[Union[BaseModel, YAMLRoot]],
29+
index_slot: SlotDefinitionName = None,
30+
schema: SchemaDefinition = None,
31+
schemaview: SchemaView = None,
32+
**kwargs) -> str:
33+
if schemaview is None:
34+
schemaview = SchemaView(schema)
35+
configmap = get_configmap(schemaview, index_slot)
36+
config = GlobalConfig(key_configs=configmap, csv_delimiter=self.delimiter)
37+
objs = unflatten_from_csv(input, config=config, **kwargs)
38+
return JSONLoader().loads(json.dumps({index_slot: objs}), target_class=target_class)
39+
40+
def load(self, source: str,
41+
target_class: Type[Union[BaseModel, YAMLRoot]],
42+
index_slot: SlotDefinitionName = None,
43+
schema: SchemaDefinition = None,
44+
schemaview: SchemaView = None,
45+
**kwargs) -> str:
46+
if schemaview is None:
47+
schemaview = SchemaView(schema)
48+
configmap = get_configmap(schemaview, index_slot)
49+
config = GlobalConfig(key_configs=configmap, csv_delimiter=self.delimiter)
50+
print(f'Loading from {source}')
51+
objs = unflatten_from_csv(source, config=config, **kwargs)
52+
return JSONLoader().loads(json.dumps({index_slot: objs}), target_class=target_class)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from linkml_runtime.loaders.delimited_file_loader import DelimitedFileLoader
2+
3+
class TSVLoader(DelimitedFileLoader):
4+
5+
@property
6+
def delimiter(self):
7+
return "\t"

tests/test_loaders_dumpers/test_csv_loader_dumper.py renamed to tests/test_loaders_dumpers/test_csv_tsv_loader_dumper.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@
99
from linkml_runtime.loaders import yaml_loader
1010
from linkml_runtime.utils.formatutils import remove_empty_items, is_empty
1111
from linkml_runtime.utils.schemaview import SchemaView
12-
from linkml_runtime.dumpers import csv_dumper
13-
from linkml_runtime.loaders import csv_loader
12+
from linkml_runtime.dumpers import csv_dumper, tsv_dumper
13+
from linkml_runtime.loaders import csv_loader, tsv_loader
1414
from linkml_runtime.utils.yamlutils import as_json_object
15-
from tests.test_loaders_dumpers.models.books_normalized import Shop, Book, GenreEnum, BookSeries
15+
from tests.test_loaders_dumpers.models.books_normalized import Author, Review, Shop, Book, GenreEnum, BookSeries
1616

1717

1818
ROOT = os.path.abspath(os.path.dirname(__file__))
@@ -30,25 +30,27 @@ def _json(obj) -> str:
3030
return json.dumps(obj, indent=' ', sort_keys=True)
3131

3232

33-
class CSVGenTestCase(unittest.TestCase):
33+
class CsvAndTsvGenTestCase(unittest.TestCase):
3434

3535
def test_object_model(self):
3636
book = Book(id='B1', genres=['fantasy'], creator={})
37-
print(book.genres)
38-
print(type(book.genres[0]))
3937
logging.debug(as_json_obj(book.genres[0]))
4038
assert str(book.genres[0]) == 'fantasy'
4139
assert book.genres[0].code.text == 'fantasy'
4240
processed = remove_empty_items(book.genres)
43-
print(f'PR={processed}')
4441
assert processed[0] == 'fantasy'
45-
series = BookSeries(id='S1')
42+
series = BookSeries(id='S1', creator=Author(name="Q. Writer"), reviews=[Review(rating=5)])
4643
series.books.append(book)
4744
schemaview = SchemaView(SCHEMA)
4845
shop = Shop()
49-
shop.all_book_series.append(book)
50-
#csvstr = csv_dumper.dumps(shop, index_slot='all_book_series', schemaview=schemaview)
51-
#logging.debug(csvstr)
46+
shop.all_book_series.append(series)
47+
48+
csvstr = csv_dumper.dumps(shop, index_slot='all_book_series', schemaview=schemaview)
49+
assert "," in csvstr
50+
assert "\t" not in csvstr
51+
52+
tsvstr = tsv_dumper.dumps(shop, index_slot='all_book_series', schemaview=schemaview)
53+
assert "\t" in tsvstr
5254

5355
def test_csvgen_roundtrip(self):
5456
schemaview = SchemaView(SCHEMA)
@@ -60,6 +62,13 @@ def test_csvgen_roundtrip(self):
6062
logging.debug(f'COMPARE 2: {data}')
6163
assert roundtrip == data
6264

65+
def test_tsvgen_roundtrip(self):
66+
schemaview = SchemaView(SCHEMA)
67+
data = yaml_loader.load(DATA, target_class=Shop)
68+
tsv_dumper.dump(data, to_file=OUTPUT, index_slot='all_book_series', schemaview=schemaview)
69+
roundtrip = tsv_loader.load(OUTPUT, target_class=Shop, index_slot='all_book_series', schemaview=schemaview)
70+
assert roundtrip == data
71+
6372
def test_csvgen_unroundtrippable(self):
6473
schemaview = SchemaView(SCHEMA)
6574
#schema = YAMLGenerator(SCHEMA).schema
@@ -84,6 +93,13 @@ def test_csvgen_unroundtrippable(self):
8493
logging.debug(json_dumper.dumps(roundtrip))
8594
assert roundtrip == data
8695

96+
def test_tsvgen_unroundtrippable(self):
97+
schemaview = SchemaView(SCHEMA)
98+
data = yaml_loader.load(DATA2, target_class=Shop)
99+
assert str(data.all_book_series[0].genres[0]) == 'fantasy'
100+
tsv_dumper.dump(data, to_file=OUTPUT2, index_slot='all_book_series', schemaview=schemaview)
101+
roundtrip = tsv_loader.load(OUTPUT2, target_class=Shop, index_slot='all_book_series', schemaview=schemaview)
102+
assert roundtrip == data
87103

88104

89105

0 commit comments

Comments
 (0)