Skip to content

Commit 9c6a556

Browse files
authored
[ENH] Possibly/semi directed paths in mixed-edge-graphs (#101)
* Adding semi directed path functions and unit-tests * Fix drawing of DiGraphs when directed/undirected edges show up simultaneously --------- Signed-off-by: Adam Li <adam2392@gmail.com>
1 parent 7151bed commit 9c6a556

File tree

12 files changed

+398
-12
lines changed

12 files changed

+398
-12
lines changed

doc/api.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ causal graph operations.
6161
find_connected_pairs
6262
add_all_snode_combinations
6363
compute_invariant_domains_per_node
64+
is_semi_directed_path
65+
all_semi_directed_paths
6466

6567
Conversions between other package's causal graphs
6668
=================================================

doc/reference/algorithms/index.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,15 @@ Algorithms for handling acyclicity
6363
:toctree: ../../generated/
6464

6565
acyclification
66+
67+
68+
***************************************
69+
Semi-directed (possibly-directed) Paths
70+
***************************************
71+
72+
.. automodule:: pywhy_graphs.algorithms.semi_directed_paths
73+
.. autosummary::
74+
:toctree: ../../generated/
75+
76+
all_semi_directed_paths
77+
is_semi_directed_path

doc/whats_new/v0.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ Changelog
2929
- |Feature| Implement and test functions to convert a DAG to MAG, by `Aryan Roy`_ (:pr:`96`)
3030
- |Feature| Implement and test functions to convert a PAG to MAG, by `Aryan Roy`_ (:pr:`93`)
3131
- |API| Remove support for Python 3.8 by `Adam Li`_ (:pr:`99`)
32+
- |Feature| Implement a suite of functions for finding and checking semi-directed paths on a mixed-edge graph, by `Adam Li`_ (:pr:`101`)
3233

3334
Code and Documentation Contributors
3435
-----------------------------------

examples/mixededge/plot_mixed_edge_graph.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,8 @@
3434
# %%
3535
# Construct a MixedEdgeGraph
3636
# --------------------------
37-
# Using the ``MixedEdgeGraph``, we can represent a causal graph
38-
# with two different kinds of edges. To create the graph, we
39-
# use networkx ``nx.DiGraph`` class to represent directed edges,
40-
# and ``nx.Graph`` class to represent edges without directions (i.e.
41-
# bidirected edges). The edge types are then specified, so the mixed edge
42-
# graph object knows which graphs are associated with which types of edges.
37+
# Here we demonstrate how to construct a mixed edge graph
38+
# by composing networkx graphs.
4339

4440
directed_G = nx.DiGraph(
4541
[
@@ -60,7 +56,6 @@
6056
name="IV Graph",
6157
)
6258

63-
# Compute the multipartite_layout using the "layer" node attribute
6459
pos = nx.spring_layout(G)
6560

6661
# we can then visualize the mixed-edge graph

examples/visualization/draw_and_compare_graphs_with_same_layout.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
cpdag.orient_uncertain_edge("x", "y")
3535
cpdag.orient_uncertain_edge("xy", "y")
3636
cpdag.orient_uncertain_edge("z", "y")
37+
38+
# create a PAG from the CPDAG, with all undirected edges
3739
pag = PAG()
3840
pag.add_edges_from(G.edges, cpdag.undirected_edge_name)
3941

pywhy_graphs/algorithms/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
from .generic import * # noqa: F403
33
from .multidomain import * # noqa: F403
44
from .pag import * # noqa: F403
5+
from .semi_directed_paths import * # noqa: F403

pywhy_graphs/algorithms/pag.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727

2828

2929
def _possibly_directed(G: PAG, i: Node, j: Node, reverse: bool = False):
30-
"""Check that path is possibly directed.
30+
"""Check that edge is possibly directed.
3131
32-
A possibly directed path is one of the form:
32+
A possibly directed edge is one of the form:
3333
- ``i -> j``
3434
- ``i o-> j``
3535
- ``i o-o j``
@@ -64,7 +64,7 @@ def _possibly_directed(G: PAG, i: Node, j: Node, reverse: bool = False):
6464

6565
# the direct check checks for i *-> j or i <-* j
6666
# i <-> j is also checked
67-
# everything else is valid
67+
# everything else is valid; i.e. i -- j, or i o-o j
6868
if direct_check or G.has_edge(i, j, G.bidirected_edge_name):
6969
return False
7070
return True
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import networkx as nx
2+
3+
from ..config import EdgeType
4+
from ..typing import Node
5+
6+
__all__ = [
7+
"is_semi_directed_path",
8+
"all_semi_directed_paths",
9+
]
10+
11+
12+
def _empty_generator():
13+
yield from ()
14+
15+
16+
def is_semi_directed_path(G, nodes):
17+
"""Returns True if and only if `nodes` form a semi-directed path in `G`.
18+
19+
A *semi-directed path* in a graph is a nonempty sequence of nodes in which
20+
no node appears more than once in the sequence, each adjacent
21+
pair of nodes in the sequence is adjacent in the graph and where each
22+
pair of adjacent nodes does not contain a directed endpoint in the direction
23+
towards the start of the sequence.
24+
25+
That is ``(a -> b o-> c <-> d -> e)`` is not a semi-directed path from ``a`` to ``e``
26+
because ``d *-> c`` is a directed endpoint in the direction towards ``a``.
27+
28+
Parameters
29+
----------
30+
G : graph
31+
A mixed-edge graph.
32+
nodes : list
33+
A list of one or more nodes in the graph `G`.
34+
35+
Returns
36+
-------
37+
bool
38+
Whether the given list of nodes represents a semi-directed path in `G`.
39+
40+
Notes
41+
-----
42+
This function is very similar to networkx's
43+
:func:`networkx.algorithms.simple_paths.is_simple_path` function.
44+
"""
45+
# The empty list is not a valid path. Could also return
46+
# NetworkXPointlessConcept here.
47+
if len(nodes) == 0:
48+
return False
49+
50+
# If the list is a single node, just check that the node is actually
51+
# in the graph.
52+
if len(nodes) == 1:
53+
return nodes[0] in G
54+
55+
# check that all nodes in the list are in the graph, if at least one
56+
# is not in the graph, then this is not a semi-directed path
57+
if not all(n in G for n in nodes):
58+
return False
59+
60+
# If the list contains repeated nodes, then it's not a semi-directed path
61+
if len(set(nodes)) != len(nodes):
62+
return False
63+
64+
# Test that each adjacent pair of nodes is adjacent and that there
65+
# is no directed endpoint towards the beginning of the sequence.
66+
for idx in range(len(nodes) - 1):
67+
u, v = nodes[idx], nodes[idx + 1]
68+
if G.has_edge(v, u, EdgeType.DIRECTED.value) or G.has_edge(v, u, EdgeType.BIDIRECTED.value):
69+
return False
70+
elif not G.has_edge(u, v):
71+
return False
72+
return True
73+
74+
75+
def all_semi_directed_paths(G, source: Node, target: Node, cutoff: int = None):
76+
"""Generate all semi-directed paths from source to target in G.
77+
78+
A semi-directed path is a path from ``source`` to ``target`` in that
79+
no end-point is directed from ``target`` to ``source``. I.e.
80+
``target *-> source`` does not exist.
81+
82+
Parameters
83+
----------
84+
G : Graph
85+
The graph.
86+
source : Node
87+
The source node.
88+
target : Node
89+
The target node.
90+
cutoff : integer, optional
91+
Depth to stop the search. Only paths of length <= cutoff are returned.
92+
93+
Notes
94+
-----
95+
This algorithm is very similar to networkx's
96+
:func:`networkx.algorithms.simple_paths.all_simple_paths` function.
97+
98+
This algorithm uses a modified depth-first search to generate the
99+
paths [1]_. A single path can be found in $O(V+E)$ time but the
100+
number of semi-directed paths in a graph can be very large, e.g. $O(n!)$ in
101+
the complete graph of order $n$.
102+
103+
This function does not check that a path exists between `source` and
104+
`target`. For large graphs, this may result in very long runtimes.
105+
Consider using `has_path` to check that a path exists between `source` and
106+
`target` before calling this function on large graphs.
107+
108+
References
109+
----------
110+
.. [1] R. Sedgewick, "Algorithms in C, Part 5: Graph Algorithms",
111+
Addison Wesley Professional, 3rd ed., 2001.
112+
"""
113+
if source not in G:
114+
raise nx.NodeNotFound("source node %s not in graph" % source)
115+
if target in G:
116+
targets = {target}
117+
else:
118+
try:
119+
targets = set(target) # type: ignore
120+
except TypeError:
121+
raise nx.NodeNotFound("target node %s not in graph" % target)
122+
if source in targets:
123+
return _empty_generator()
124+
if cutoff is None:
125+
cutoff = len(G) - 1
126+
if cutoff < 1:
127+
return _empty_generator()
128+
if cutoff is None:
129+
cutoff = len(G) - 1
130+
131+
return _all_semi_directed_paths_graph(G, source, targets, cutoff)
132+
133+
134+
def _all_semi_directed_paths_graph(
135+
G, source, targets, cutoff, directed_edge_name="directed", bidirected_edge_name="bidirected"
136+
):
137+
"""See networkx's all_simple_paths function.
138+
139+
This performs a depth-first search for all semi-directed paths from source to target.
140+
"""
141+
# memoize each node that was already visited
142+
visited = {source: True}
143+
144+
# iterate over neighbors of source
145+
stack = [iter(G.neighbors(source))]
146+
147+
# if source has no neighbors, then prev_nodes should be None
148+
prev_nodes = [source]
149+
150+
while stack:
151+
# get the iterator through nbrs for the current node
152+
nbrs = stack[-1]
153+
prev_node = prev_nodes[-1]
154+
nbr = next(nbrs, None)
155+
156+
# The first condition guarantees that there is not a directed endpoint
157+
# along the path from source to target that points towards source.
158+
if (
159+
G.has_edge(nbr, prev_node, directed_edge_name)
160+
or G.has_edge(nbr, prev_node, bidirected_edge_name)
161+
) and nbr not in visited:
162+
# If we've found a directed edge from child to prev_node,
163+
# that we haven't visited, then we don't need to continue down this path
164+
continue
165+
elif nbr is None:
166+
# once all children are visited, pop the stack
167+
# and remove the child from the visited set
168+
stack.pop()
169+
visited.popitem()
170+
prev_nodes.pop()
171+
elif len(visited) < cutoff:
172+
if nbr in visited:
173+
continue
174+
if nbr in targets:
175+
# we've found a path to a target
176+
yield list(visited) + [nbr]
177+
visited[nbr] = True
178+
if targets - set(visited.keys()): # expand stack until find all targets
179+
stack.append(iter(G.neighbors(nbr)))
180+
prev_nodes.append(nbr)
181+
else:
182+
visited.popitem() # maybe other ways to child
183+
else: # len(visited) == cutoff:
184+
for target in (targets & (set(nbrs) | {nbr})) - set(visited.keys()):
185+
yield list(visited) + [target]
186+
stack.pop()
187+
visited.popitem()
188+
prev_nodes.pop()

0 commit comments

Comments
 (0)