7676}
7777
7878
79+ def _is_list_of_lists (x : Any ) -> bool :
80+ """
81+ True x is of the form `[[...`
82+
83+ >>> _is_list_of_lists([1])
84+ False
85+ >>> _is_list_of_lists([[1,2],[3,4]])
86+ True
87+ >>> _is_list_of_lists([[]])
88+ True
89+
90+ :param x: element to be tested
91+ :return: True if LoL
92+ """
93+ return x and isinstance (x , list ) and isinstance (x [0 ], list )
94+
95+
96+ def linearize_nested_lists (nested_list : List , is_row_ordered = True ):
97+ """
98+ Returns a linear sequence of elements corresponding to a nested list array representation
99+
100+ >>> linearize_nested_lists([[11,12,13],[21,22,23],[31,32,33]], is_row_ordered=True)
101+ [11, 12, 13, 21, 22, 23, 31, 32, 33]
102+
103+ >>> linearize_nested_lists([[11,12,13],[21,22,23],[31,32,33]], is_row_ordered=False)
104+ [11, 21, 31, 12, 22, 32, 13, 23, 33]
105+
106+ :param nested_list:
107+ :param is_row_ordered:
108+ :return:
109+ """
110+ if not is_row_ordered :
111+ return _linearize_nested_list_column_order (nested_list )
112+ # row-ordered
113+ result = []
114+ stack = [iter (nested_list )]
115+ while stack :
116+ try :
117+ item = next (stack [- 1 ])
118+ if isinstance (item , list ):
119+ stack .append (iter (item ))
120+ else :
121+ result .append (item )
122+ except StopIteration :
123+ stack .pop ()
124+ return result
125+
126+
127+ def _linearize_nested_list_column_order (nested_list ):
128+ result = []
129+ if not nested_list :
130+ return result
131+
132+ num_rows = len (nested_list )
133+ max_row_len = max (len (row ) for row in nested_list )
134+
135+ for col in range (max_row_len ):
136+ for row in range (num_rows ):
137+ if col < len (nested_list [row ]):
138+ result .append (nested_list [row ][col ])
139+
140+ return result
141+
79142class CollectionForm (Enum ):
80143 """Form of a schema element.
81144 See Part 6 of the LinkML specification"""
@@ -85,6 +148,7 @@ class CollectionForm(Enum):
85148 CompactDict = "CompactDict"
86149 SimpleDict = "SimpleDict"
87150 List = "List"
151+ ListOfLists = "ListOfLists"
88152
89153
90154COLLECTION_FORM_NORMALIZATION = Tuple [CollectionForm , CollectionForm ]
@@ -227,6 +291,8 @@ def _remove_pk(obj: dict, pk_slot_name: str) -> dict:
227291
228292def _add_pk (obj : dict , pk_slot_name : str , pk_val : Any ) -> dict :
229293 """Make a new ExpandedDict ready copy of a dict, adding the pk_slot_name"""
294+ if obj is None :
295+ return {pk_slot_name : pk_val }
230296 if pk_slot_name not in obj :
231297 obj = copy (obj )
232298 obj [pk_slot_name ] = pk_val
@@ -327,7 +393,7 @@ def _create_index_slot(
327393 :return:
328394 """
329395 target = self ._schema_root (target )
330- slot = SlotDefinition (name = "temp" , range = target )
396+ slot = SlotDefinition (name = "temp" , range = target , inlined = True )
331397 if input_object is None or isinstance (input_object , dict ):
332398 slot .inlined = True
333399 elif isinstance (input_object , list ):
@@ -356,7 +422,7 @@ def normalize_slot_value(
356422 normalized_object = copy (input_object )
357423 if isinstance (range_element , ClassDefinition ):
358424 pk_slot_name = self ._identifier_slot_name (range_element )
359- normalized_object = self .normalize_to_collection_from (
425+ normalized_object = self .normalize_to_collection_form (
360426 form , normalized_object , parent_slot , pk_slot_name , report
361427 )
362428 # Validate
@@ -383,12 +449,15 @@ def normalize_slot_value(
383449 k : self .normalize_instance (v , simple_dict_value_slot , new_report )
384450 for k , v in normalized_object .items ()
385451 }
452+ elif _is_list_of_lists (normalized_object ):
453+ raise NotImplementedError (f"List of Lists: { normalized_object } " )
386454 elif isinstance (normalized_object , list ):
387455 output_object = [
388456 self .normalize_instance (v , parent_slot , new_report )
389457 for v in normalized_object
390458 ]
391459 else :
460+ # normalize an instance
392461 output_object = self .normalize_instance (
393462 normalized_object , parent_slot , new_report
394463 )
@@ -432,14 +501,31 @@ def infer_slot_collection_form(self, parent_slot: SlotDefinition) -> CollectionF
432501 return CollectionForm .ExpandedDict
433502 return CollectionForm .CompactDict
434503
435- def normalize_to_collection_from (
504+ def normalize_to_collection_form (
436505 self ,
437506 form : CollectionForm ,
438507 input_object : Any ,
439508 slot : SlotDefinition ,
440509 pk_slot_name : SlotDefinitionName ,
441510 report : Report ,
442511 ) -> Any :
512+ """
513+ Normalizes the input object to a defined form
514+
515+ :param form:
516+ :param input_object:
517+ :param slot:
518+ :param pk_slot_name:
519+ :param report:
520+ :return:
521+ """
522+ if _is_list_of_lists (input_object ):
523+ if form != CollectionForm .List :
524+ return input_object
525+ if not any (impl for impl in slot .implements if impl == "linkml:elements" ):
526+ return input_object
527+ is_row_ordered = not any (impl for impl in slot .implements if impl == "linkml:ColumnOrderedArray" )
528+ input_object = linearize_nested_lists (input_object , is_row_ordered )
443529 if form == CollectionForm .NonCollection :
444530 return self .ensure_non_collection (input_object , slot , pk_slot_name , report )
445531 elif form == CollectionForm .List :
@@ -663,7 +749,7 @@ def normalize_instance(
663749 elif isinstance (range_element , TypeDefinition ):
664750 return self .normalize_type (input_object , range_element , report , parent_slot )
665751 else :
666- raise ValueError ( f"Cannot normalize: unknown range { parent_slot . range } " )
752+ return input_object
667753
668754 def normalize_reference (
669755 self , input_object : dict , target : ClassDefinition , report : Report
@@ -672,7 +758,7 @@ def normalize_reference(
672758 if pk_slot is None :
673759 raise AssertionError (f"Cannot normalize: no primary key for { target .name } " )
674760 return self .normalize_type (
675- input_object , self .derived_schema .types [ pk_slot .range ] , report
761+ input_object , self .derived_schema .types . get ( pk_slot .range , None ) , report
676762 )
677763
678764 def normalize_object (
@@ -769,12 +855,14 @@ def normalize_enum(
769855 def normalize_type (
770856 self ,
771857 input_object : Any ,
772- target : TypeDefinition ,
858+ target : Optional [ TypeDefinition ] ,
773859 report : Report ,
774860 parent_slot : SlotDefinition = None ,
775861 ) -> Any :
776862 if input_object is None :
777863 return None
864+ if target is None :
865+ return input_object
778866 output_value = input_object
779867 if target .base in XSD_OR_BASE_TO_PYTHON :
780868 expected_python_type = XSD_OR_BASE_TO_PYTHON [target .base ]
@@ -870,7 +958,7 @@ def subsumes(self, parent: ClassDefinition, child: ClassDefinition):
870958 child .name , reflexive = True
871959 )
872960
873- def _slot_range_element (self , slot : SlotDefinition ) -> Element :
961+ def _slot_range_element (self , slot : SlotDefinition ) -> Optional [ Element ] :
874962 ds = self .derived_schema
875963 sr = slot .range
876964 if sr in ds .classes :
@@ -880,7 +968,7 @@ def _slot_range_element(self, slot: SlotDefinition) -> Element:
880968 elif sr in ds .types :
881969 return ds .types [sr ]
882970 else :
883- raise ValueError ( f"Undefined range { sr } " )
971+ return None
884972
885973 def _slot_collection_form (self , slot : SlotDefinition ) -> CollectionForm :
886974 if not slot .multivalued :
0 commit comments