@@ -24,15 +24,16 @@ public class RDFC10Canonicalizer {
2424 private final int maxCallsHashNDegreeQuads ;
2525 private final StatementUtils statementUtils ;
2626 private int callsHashNDegreeQuads = 0 ;
27+ private Set <String > currentPathVisited = new HashSet <>();
2728
2829 /**
2930 * Constructs a new Rdfc10Canonicalizer with specified configuration.
3031 *
3132 * @param hashAlgorithm The hashing algorithm to use for canonicalization (SHA-256 or SHA-384).
3233 * @param maxCalls The maximum number of recursive calls to the Hash N-Degree Quads algorithm
33- * to prevent infinite loops on complex cyclic graphs.
34+ * to prevent infinite loops on complex cyclic graphs.
3435 * @param valueFactory The factory for creating RDF values, used by StatementUtils for
35- * blank node replacement and serialization.
36+ * blank node replacement and serialization.
3637 */
3738 public RDFC10Canonicalizer (RDFC10SerializerOptions .HashAlgorithm hashAlgorithm , int maxCalls , ValueFactory valueFactory ) {
3839 this .hashAlgorithm = Objects .requireNonNull (hashAlgorithm , "Hash algorithm cannot be null" );
@@ -52,7 +53,7 @@ public RDFC10Canonicalizer(RDFC10SerializerOptions.HashAlgorithm hashAlgorithm,
5253 * @param model The input model to canonicalize. Must not be null.
5354 * @return A list of canonicalized and sorted statements ready for serialization.
5455 * @throws SerializationException if canonicalization fails due to algorithmic constraints
55- * or invalid input data.
56+ * or invalid input data.
5657 */
5758 public List <Statement > canonicalize (Model model ) {
5859 Objects .requireNonNull (model , "Model cannot be null" );
@@ -71,7 +72,7 @@ private List<Statement> canonicalize(Stream<Statement> statements) {
7172
7273 // Reset the recursive call counter for each canonicalization operation
7374 callsHashNDegreeQuads = 0 ;
74-
75+ currentPathVisited . clear ();
7576 // Step 1: Create a mapping of blank nodes to their associated statements
7677 Map <String , Set <Statement >> blankNodeToQuads = createBNodeToQuadsMap (stmtList );
7778
@@ -98,7 +99,7 @@ private List<Statement> canonicalize(Stream<Statement> statements) {
9899 * @return A map linking blank node identifiers to their associated statements.
99100 */
100101 private Map <String , Set <Statement >> createBNodeToQuadsMap (List <Statement > statements ) {
101- Map <String , Set <Statement >> blankNodeToQuads = new HashMap <>();
102+ Map <String , Set <Statement >> blankNodeToQuads = new LinkedHashMap <>();
102103
103104 for (Statement stmt : statements ) {
104105 if (stmt == null ) continue ;
@@ -132,39 +133,44 @@ private Map<String, String> createCanonicalMap(Map<String, Set<Statement>> bnode
132133 Map <String , String > canonicalIssuer = new HashMap <>();
133134 int counter = 0 ;
134135
136+ List <String > bnodeOrder = new ArrayList <>(bnodeToQuads .keySet ());
137+
135138 // Step 1: Calculate first-degree hashes for all blank nodes
136- Map <String , String > firstDegreeHashes = new HashMap <>();
137- for (String bnode : bnodeToQuads . keySet () ) {
139+ Map <String , String > firstDegreeHashes = new LinkedHashMap <>();
140+ for (String bnode : bnodeOrder ) {
138141 String hash = hashFirstDegreeQuads (bnode , bnodeToQuads );
139142 firstDegreeHashes .put (bnode , hash );
140143 }
141144
142145 // Step 2: Create hash groups
143- Map <String , List <String >> hashToNodes = new HashMap <>();
144- for (String node : bnodeToQuads . keySet () ) {
146+ Map <String , List <String >> hashToNodes = new LinkedHashMap <>();
147+ for (String node : bnodeOrder ) {
145148 String hash = firstDegreeHashes .get (node );
146149 hashToNodes .computeIfAbsent (hash , k -> new ArrayList <>()).add (node );
147150 }
148151
149152 // Step 3: Separate into single-node and multi-node groups
150- List <String > singleNodeHashes = new ArrayList <>();
153+ List <String > singleNodeBnodes = new ArrayList <>();
151154 List <String > multiNodeHashes = new ArrayList <>();
155+
156+ for (String bnode : bnodeOrder ) {
157+ String hash = firstDegreeHashes .get (bnode );
158+ if (hashToNodes .get (hash ).size () == 1 ) {
159+ singleNodeBnodes .add (bnode );
160+ }
161+ }
162+
152163 for (Map .Entry <String , List <String >> entry : hashToNodes .entrySet ()) {
153- if (entry .getValue ().size () == 1 ) {
154- singleNodeHashes .add (entry .getKey ());
155- } else {
164+ if (entry .getValue ().size () > 1 ) {
156165 multiNodeHashes .add (entry .getKey ());
157166 }
158167 }
159168
160- // Sort hashes within their groups
161- Collections .sort (singleNodeHashes );
162169 Collections .sort (multiNodeHashes );
163170
164- // Step 4: Process single-node groups first
165- for (String hash : singleNodeHashes ) {
166- String node = hashToNodes .get (hash ).get (0 );
167- canonicalIssuer .put (node , SerializationConstants .C14N + counter ++);
171+ // Step 4: Process single-node groups FIRST (dans l'ordre d'apparition!)
172+ for (String bnode : singleNodeBnodes ) {
173+ canonicalIssuer .put (bnode , SerializationConstants .C14N + counter ++);
168174 }
169175
170176 // Step 5: Process multi-node groups using N-degree hashing
@@ -178,13 +184,14 @@ private Map<String, String> createCanonicalMap(Map<String, Set<Statement>> bnode
178184 nDegreeHashes .put (node , nDegreeHash );
179185 }
180186
181- nodes .sort ((n1 , n2 ) -> {
187+ List <String > sortedNodes = new ArrayList <>(nodes );
188+ sortedNodes .sort ((n1 , n2 ) -> {
182189 int cmp = nDegreeHashes .get (n1 ).compareTo (nDegreeHashes .get (n2 ));
183190 if (cmp != 0 ) return cmp ;
184- return n1 . compareTo (n2 );
191+ return Integer . compare ( bnodeOrder . indexOf ( n1 ), bnodeOrder . indexOf (n2 ) );
185192 });
186193
187- for (String node : nodes ) {
194+ for (String node : sortedNodes ) {
188195 canonicalIssuer .put (node , SerializationConstants .C14N + counter ++);
189196 }
190197 }
@@ -238,52 +245,64 @@ private String hashNDegreeQuads(String identifier, Map<String, Set<Statement>> b
238245 );
239246 }
240247
241- // Collect all related blank nodes from all quads containing this node
242- Set <String > relatedBlankNodes = new HashSet <>();
243- for (Statement quad : blankNodeToQuads .get (identifier )) {
244- relatedBlankNodes .addAll (getRelatedBlankNodes (quad , identifier ));
248+ if (currentPathVisited .contains (identifier )) {
249+ // Return a stable hash for cyclic references to break the infinite recursion
250+ return hash ("CYCLE:" + identifier + ":" + issuer .issue (identifier ));
245251 }
246252
247- // Calculate hashes for each related blank node
248- List <String > relatedHashes = new ArrayList <>();
249- for (String relatedNode : relatedBlankNodes ) {
250- String relatedHash ;
251-
252- if (canonicalIssuer .containsKey (relatedNode )) {
253- // Use canonical ID if already assigned
254- relatedHash = canonicalIssuer .get (relatedNode );
255- } else if (issuer .hasIssued (relatedNode )) {
256- // Use temporary ID if already issued
257- relatedHash = issuer .issue (relatedNode );
258- } else {
259- // Recursively calculate N-degree hash
260- TemporaryIssuer newIssuer = issuer .copy ();
261- relatedHash = hashNDegreeQuads (relatedNode , blankNodeToQuads , canonicalIssuer , newIssuer );
253+ try {
254+ currentPathVisited .add (identifier );
255+
256+ // Collect all related blank nodes from all quads containing this node
257+ Set <String > relatedBlankNodes = new HashSet <>();
258+ for (Statement quad : blankNodeToQuads .get (identifier )) {
259+ relatedBlankNodes .addAll (getRelatedBlankNodes (quad , identifier ));
262260 }
263261
264- relatedHashes .add (relatedHash );
265- }
262+ // Calculate hashes for each related blank node
263+ List <String > relatedHashes = new ArrayList <>();
264+ for (String relatedNode : relatedBlankNodes ) {
265+ String relatedHash ;
266+
267+ if (canonicalIssuer .containsKey (relatedNode )) {
268+ // Use canonical ID if already assigned
269+ relatedHash = canonicalIssuer .get (relatedNode );
270+ } else if (issuer .hasIssued (relatedNode )) {
271+ // Use temporary ID if already issued
272+ relatedHash = issuer .issue (relatedNode );
273+ } else {
274+ // Recursively calculate N-degree hash
275+ TemporaryIssuer newIssuer = issuer .copy ();
276+ relatedHash = hashNDegreeQuads (relatedNode , blankNodeToQuads , canonicalIssuer , newIssuer );
277+ }
278+
279+ relatedHashes .add (relatedHash );
280+ }
266281
267- // Sort the related hashes
268- Collections .sort (relatedHashes );
282+ // Sort the related hashes
283+ Collections .sort (relatedHashes );
269284
270- // Build the final hash input
271- StringBuilder hashInput = new StringBuilder ();
272- hashInput .append (hashFirstDegreeQuads (identifier , blankNodeToQuads ));
273- for (String relatedHash : relatedHashes ) {
274- hashInput .append (relatedHash );
275- }
285+ // Build the final hash input
286+ StringBuilder hashInput = new StringBuilder ();
287+ hashInput .append (hashFirstDegreeQuads (identifier , blankNodeToQuads ));
288+ for (String relatedHash : relatedHashes ) {
289+ hashInput .append (relatedHash );
290+ }
291+
292+ return hash (hashInput .toString ());
276293
277- return hash (hashInput .toString ());
294+ } finally {
295+ currentPathVisited .remove (identifier );
296+ }
278297 }
279298
280299 /**
281300 * Converts a statement to canonical N-Quad format for hashing, replacing
282301 * a specific blank node with a placeholder string.
283302 *
284- * @param quad The statement to convert.
303+ * @param quad The statement to convert.
285304 * @param blankNodeToReplace The blank node identifier to replace.
286- * @param replacement The placeholder string to use for replacement.
305+ * @param replacement The placeholder string to use for replacement.
287306 * @return A canonical N-Quad string with placeholder substitution.
288307 */
289308 private String quadToNQuad (Statement quad , String blankNodeToReplace , String replacement ) {
@@ -376,9 +395,11 @@ private List<Statement> replaceBlankNodesAndSort(List<Statement> statements, Map
376395 .map (stmt -> statementUtils .replaceBlankNodes (stmt , canonicalMap ))
377396 .toList ();
378397
379- return replaced .stream ()
398+ List < Statement > sorted = replaced .stream ()
380399 .sorted (Comparator .comparing (StatementUtils ::toNQuad ))
381400 .toList ();
401+
402+ return sorted ;
382403 }
383404
384405 /**
0 commit comments