Skip to content

Commit ff268a3

Browse files
authored
Merge pull request #469 from linkml/schemaview_cycle_check
schemaview.py: add cycle detection function
2 parents e63dbb0 + 7384670 commit ff268a3

File tree

3 files changed

+413
-5
lines changed

3 files changed

+413
-5
lines changed

linkml_runtime/utils/schemaview.py

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import uuid
99
import warnings
1010
from collections import defaultdict, deque
11-
from collections.abc import Callable, Mapping
1211
from copy import copy, deepcopy
1312
from dataclasses import dataclass
1413
from enum import Enum
@@ -45,7 +44,7 @@
4544
from linkml_runtime.utils.pattern import PatternResolver
4645

4746
if TYPE_CHECKING:
48-
from collections.abc import Mapping
47+
from collections.abc import Callable, Iterable, Mapping
4948
from types import NotImplementedType
5049

5150
from linkml_runtime.utils.metamodelcore import URI, URIorCURIE
@@ -92,13 +91,90 @@ class OrderedBy(Enum):
9291
"""
9392

9493

94+
WHITE = 0
95+
GREY = 1
96+
BLACK = 2
97+
98+
99+
def detect_cycles(f: Callable[[Any], Iterable[Any] | None], x: Any) -> None:
100+
"""Detect cycles in a graph, using function `f` to walk the graph, starting at node `x`.
101+
102+
Uses the classic white/grey/black colour coding algorithm to track which nodes have been explored. In this
103+
case, "node" refers to any element in a schema and "neighbours" are elements that can be reached from that
104+
node by executing function `f`.
105+
106+
WHITE: unexplored
107+
GREY: node is being processed; processing includes exploring all neighbours reachable via f(node)
108+
BLACK: node and all of its neighbours (and their neighbours, etc.) have been processed
109+
110+
A directed cycle reachable from node `x` raises a ValueError.
111+
112+
:param f: function that returns an iterable of neighbouring nodes (parents or children)
113+
:type f: Callable[[Any], Iterable[Any] | None]
114+
:param x: graph node
115+
:type x: Any
116+
:raises ValueError: if a cycle is discovered through repeated calls to f(x)
117+
"""
118+
# keep track of the processing state of nodes in the graph
119+
processing_state: dict[Any, int] = {}
120+
121+
# Stack entries are (node, processed_flag).
122+
# processed_flag == True means all neighbours (nodes generated by running `f(node)`)
123+
# have been added to the todo stack and the node can be marked BLACK.
124+
todo: list[tuple[Any, bool]] = [(x, False)]
125+
126+
while todo:
127+
node, processed_flag = todo.pop()
128+
129+
if processed_flag:
130+
# all neighbours have been processed
131+
processing_state[node] = BLACK
132+
continue
133+
134+
# check the state of this node
135+
node_state = processing_state.get(node, WHITE)
136+
137+
if node_state == GREY:
138+
# this node was already being processed
139+
# we have discovered an edge back to that node - i.e. a cycle
140+
err_msg = f"Cycle detected at node {node!r}"
141+
raise ValueError(err_msg)
142+
143+
if node_state == BLACK:
144+
# already fully explored - nothing to do
145+
continue
146+
147+
# mark the node as being processed (GREY) and set the processed_flag to True
148+
processing_state[node] = GREY
149+
todo.append((node, True))
150+
151+
# push the neighbours on to the processing stack
152+
todo.extend((child, False) for child in f(node) or [])
153+
154+
95155
def _closure(
96-
f: Callable,
97-
x,
156+
f: Callable[[Any], Iterable[Any] | None],
157+
x: Any,
98158
reflexive: bool = True,
99159
depth_first: bool = True,
100-
**kwargs: dict[str, Any] | None, # noqa: ARG001
160+
**kwargs: dict[str, Any] | None,
101161
) -> list[str | ElementName | ClassDefinitionName | EnumDefinitionName | SlotDefinitionName | TypeDefinitionName]:
162+
"""Walk the graph using function `f` and generate the closure.
163+
164+
:param f: function that returns an iterable of neighbouring nodes (parents or children)
165+
:type f: Callable[[Any], Iterable[Any] | None]
166+
:param x: start node
167+
:type x: Any
168+
:param reflexive: assume the graph is reflexive, defaults to True
169+
:type reflexive: bool, optional
170+
:param depth_first: depth first traversal, defaults to True
171+
:type depth_first: bool, optional
172+
:return: list of nodes
173+
:rtype: list[str | ElementName | ClassDefinitionName | EnumDefinitionName | SlotDefinitionName | TypeDefinitionName]
174+
"""
175+
if kwargs and kwargs.get("detect_cycles"):
176+
detect_cycles(f, x)
177+
102178
rv = [x] if reflexive else []
103179
visited = []
104180
todo = [x]

tests/test_utils/input/cycles.yaml

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
# yaml-language-server: $schema=https://linkml.io/linkml-model/linkml_model/jsonschema/meta.schema.json
2+
id: https://example.org/test-cycle-schema
3+
name: TestCycleSchema
4+
description: |
5+
Schema with intentional cycles in classes, mixins, slot ranges, and types.
6+
prefixes:
7+
ex: https://example.org/schema/
8+
9+
default_prefix: ex
10+
default_range: string
11+
12+
slots:
13+
identifier_slot:
14+
range: string
15+
identifier: true
16+
noncycle_slot:
17+
slot_a:
18+
slot_b:
19+
slot_c:
20+
mixed_in_slot_a:
21+
mixed_in_slot_b:
22+
23+
classes:
24+
# Non‑cyclic base class with some children
25+
BaseClass:
26+
description: Simple base class with no cycles.
27+
slots:
28+
- noncycle_slot
29+
- identifier_slot
30+
31+
MixinA:
32+
mixin: true
33+
slots:
34+
- mixed_in_slot_a
35+
36+
MixinB:
37+
mixin: true
38+
slots:
39+
- mixed_in_slot_b
40+
41+
NonCycleClassA:
42+
is_a: BaseClass
43+
44+
NonCycleClassB:
45+
mixins:
46+
- MixinA
47+
is_a: NonCycleClassA
48+
49+
NonCycleClassC:
50+
mixins:
51+
- MixinB
52+
is_a: NonCycleClassA
53+
54+
# cycle in the range of a slot
55+
IdentifierCycleClassA:
56+
description: Class with a cycle in the range for the identifier slot.
57+
attributes:
58+
id_slot:
59+
range: IdentifierCycleClassB
60+
identifier: true
61+
62+
IdentifierCycleClassB:
63+
description: Class with a cycle in the range for the identifier slot.
64+
attributes:
65+
id_slot:
66+
range: IdentifierCycleClassC
67+
identifier: true
68+
69+
IdentifierCycleClassC:
70+
description: Class with a cycle in the range for the identifier slot.
71+
attributes:
72+
id_slot:
73+
range: IdentifierCycleClassA
74+
identifier: true
75+
76+
IdentifierCycleClassD:
77+
description: Class with itself as the range for the identifier slot.
78+
attributes:
79+
id_slot:
80+
range: IdentifierCycleClassD
81+
identifier: true
82+
83+
# Cycle: ClassA -> ClassB -> ClassC -> ClassA
84+
# ClassD and ClassE have the misfortune of inheriting all this nonsense.
85+
ClassA:
86+
is_a: ClassB
87+
description: Part of a subclass inheritance cycle (A -> B).
88+
slots:
89+
- slot_a
90+
91+
ClassB:
92+
is_a: ClassC
93+
description: Part of a subclass inheritance cycle (B -> C).
94+
slots:
95+
- slot_b
96+
97+
ClassC:
98+
is_a: ClassA
99+
description: Part of a subclass inheritance cycle (C -> A).
100+
slots:
101+
- slot_c
102+
103+
ClassD:
104+
is_a: ClassA
105+
106+
ClassE:
107+
is_a: ClassD
108+
109+
# Cycle: ClassF -> ClassF
110+
# ClassG is_a ClassF so inherits the cycle
111+
ClassF:
112+
is_a: ClassF
113+
114+
ClassG:
115+
is_a: ClassF
116+
117+
# Mixin cycle (mixins reference each other)
118+
Mixin1:
119+
description: Mixin that pulls in Mixin2, forming a mixin cycle.
120+
mixin: true
121+
mixins:
122+
- Mixin2
123+
124+
Mixin2:
125+
description: Mixin that pulls in Mixin1, forming a mixin cycle.
126+
mixin: true
127+
mixins:
128+
- Mixin1
129+
130+
MixedClass:
131+
description: Class that applies both Mixin1 and Mixin2, thus inheriting the mixin cycle.
132+
mixins:
133+
- Mixin1
134+
- Mixin2
135+
136+
137+
types:
138+
# string and subtypes
139+
string:
140+
uri: ex:string
141+
base: str
142+
description: A character string
143+
144+
super_string:
145+
typeof: string
146+
description: Type with ancestors, no cycles
147+
148+
supreme_string:
149+
typeof: super_string
150+
description: Type with ancestors, no cycles
151+
152+
integer:
153+
uri: ex:integer
154+
base: int
155+
description: An integer
156+
157+
boolean:
158+
uri: ex:boolean
159+
base: Bool
160+
repr: bool
161+
description: Your basic bool.
162+
163+
# in a cycle with itself!
164+
circle:
165+
typeof: circle
166+
uri: ex:circle
167+
168+
circle_of_life:
169+
typeof: circle
170+
171+
# cycle between type_circular and circular_type
172+
circular_type:
173+
typeof: type_circular
174+
uri: ex:circ
175+
description: Type in a cycle
176+
177+
type_circular:
178+
typeof: circular_type
179+
uri: ex:circ
180+
description: Type in a cycle
181+
182+
# inherit the type_circular/circular_type confusion
183+
semi_circular_type:
184+
typeof: circular_type
185+
description: Type with cyclic ancestors
186+
187+
curve_type:
188+
typeof: semi_circular_type
189+
description: Type with cyclic ancestors

0 commit comments

Comments
 (0)