1212from itertools import product
1313import sys
1414
15+ import networkx as nx
1516from rdflib import Variable
1617from scipy .special import binom
1718from scipy .misc import comb
2122from graph_pattern import TARGET_VAR
2223from graph_pattern import GraphPattern
2324from graph_pattern import canonicalize
25+ from graph_pattern import to_nx_graph
2426
2527logger = logging .getLogger (__name__ )
2628logger .info ('init' )
@@ -42,7 +44,6 @@ def numerical_patterns(
4244 length ,
4345 loops = True ,
4446 node_edge_joint = True ,
45- p_connected = True ,
4647 _partial_pattern = None ,
4748 _pos = None ,
4849 _var = 1 ,
@@ -119,7 +120,11 @@ def numerical_patterns(
119120 s , p , o = _partial_pattern [i ]
120121 for pt in _partial_pattern [:i ]:
121122 # loop over previous triples and check if current is connected
122- if s in pt or o in pt or (p_connected and p in pt ):
123+ if s in pt or p in pt or o in pt :
124+ # for p_only_connected it's possible to become
125+ # n_connected again later:
126+ # 123 145 627 685
127+ # ^ ^
123128 break
124129 else :
125130 # we're not connected, early terminate this
@@ -163,7 +168,6 @@ def numerical_patterns(
163168 length ,
164169 loops = loops ,
165170 node_edge_joint = node_edge_joint ,
166- p_connected = p_connected ,
167171 _partial_pattern = _partial_pattern ,
168172 _pos = (i , j ),
169173 _var = v
@@ -175,7 +179,7 @@ def patterns(
175179 length ,
176180 loops = True ,
177181 node_edge_joint = True ,
178- p_connected = True ,
182+ p_only_connected = True ,
179183 source_target_edges = True ,
180184 exclude_isomorphic = True ,
181185 count_candidates_only = False ,
@@ -191,21 +195,28 @@ def patterns(
191195 length ,
192196 loops = loops ,
193197 node_edge_joint = node_edge_joint ,
194- p_connected = p_connected ,
195198 )):
196199 flat_num_pat = [v for t in num_pat for v in t ]
197200 all_numbers = set (flat_num_pat )
198201
202+ if not p_only_connected :
203+ # Numerical patterns are always connected, but they might be
204+ # p_only_connected (e.g., 123 425).
205+ # Check that the pattern isn't p_only_connected, meaning that it's
206+ # also connected by nodes (e.g., 123 325).
207+ # Note that in case of node_edge_joint 123 245 is also considered
208+ # p_only_connected.
209+ if not nx .is_connected (to_nx_graph (num_pat )):
210+ logger .debug ('excluded %d: not node connected:\n %s' , c , num_pat )
211+ continue
212+
199213 if source_target_edges :
200214 all_numbers = sorted (all_numbers )
201215 numbers = all_numbers
202216 else :
203217 numbers = sorted (all_numbers - set (flat_num_pat [1 ::3 ]))
204218 all_numbers = sorted (all_numbers )
205219
206- # var_map = {i: '?v%d' % i for i in numbers}
207- # pattern = GraphPattern(
208- # tuple([tuple([var_map[i] for i in t]) for t in numerical_repr]))
209220 if count_candidates_only :
210221 l = len (numbers )
211222 perms = l * (l - 1 )
@@ -256,7 +267,7 @@ def pattern_generator(
256267 length ,
257268 loops = True ,
258269 node_edge_joint = True ,
259- p_connected = True ,
270+ p_only_connected = True ,
260271 source_target_edges = True ,
261272 exclude_isomorphic = True ,
262273):
@@ -307,7 +318,7 @@ def pattern_generator(
307318 continue
308319
309320 # check that the pattern is connected
310- if not gp .is_connected (via_edges = p_connected ):
321+ if not gp .is_connected (via_edges = p_only_connected ):
311322 logger .debug ('excluded %d: not connected:\n %s' , pid , gp )
312323 continue
313324
@@ -338,16 +349,28 @@ def pattern_generator(
338349
339350
340351def main ():
341- length = 1
352+ # len | pcon | nej | all | candidates (all) | candidates (all) |
353+ # | | | (canonical) | (old method) | (numerical) |
354+ # ----+------+-----+--------------+-------------------+-------------------+
355+ # 1 | 8 | 12 | 12 | 27 | 12 |
356+ # 2 | 146 | 469 | 693 | 7750 | 1314 |
357+ # 3 | | | 47478 | 6666891 | 151534 |
358+ # 4 | | | | 11671285626 | 20884300 |
359+ # 5 | | | | 34549552710596 | 3461471628 |
360+
361+ # len | typical | candidates | candidates |
362+ # | (canonical) | (old method) | (numerical) |
363+ # ----+-------------+----------------+-------------+
364+ # 1 | 2 | 27 | 2 |
365+ # 2 | 28 | 7750 | 54 |
366+ # 3 | 486 | 6666891 | 1614 |
367+ # 4 | 10374 | 11671285626 | 59654 |
368+ # 5 | | 34549552710596 | 2707960 |
369+
370+ # typical above means none of (loops, nej, pcon, source_target_edges)
371+
372+ length = 5
342373 canonical = True
343- # len | pcon | nej | pcon, nej | candidates | candidates |
344- # | | | (canonical) | (old method) | (numerical) |
345- # ----+------+-----+--------------+----------------+-------------+
346- # 1 | 8 | 12 | 12 | 27 | 12 |
347- # 2 | 146 | 469 | 693 | 7750 | 1314 |
348- # 3 | | | 47478 | 6666891 | 151534 |
349- # 4 | | | | 11671285626 | 20884300 |
350- # 5 | | | | 34549552710596 | 3461471628 |
351374
352375 gen_patterns = []
353376 n = 0
@@ -356,7 +379,7 @@ def main():
356379 length ,
357380 loops = False ,
358381 node_edge_joint = False ,
359- p_connected = False ,
382+ p_only_connected = False ,
360383 source_target_edges = False ,
361384 exclude_isomorphic = canonical ,
362385 count_candidates_only = False ,
0 commit comments