Skip to content

Commit 40c19f5

Browse files
abdessamad-abdounMaillPierre
authored andcommitted
Implement standard tests for Canonical RDF into Corese-W3C
#212
1 parent 1f3af16 commit 40c19f5

File tree

2 files changed

+182
-11
lines changed

2 files changed

+182
-11
lines changed

src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@ protected AbstractIRI(String fullIRI) {
2525
if (fullIRI == null) {
2626
throw new IllegalArgumentException("fullIRI cannot be null");
2727
}
28-
// if (!IRIUtils.isStandardIRI(fullIRI)) {
29-
// throw new IncorrectFormatException("IRI '" + fullIRI + "' must be a valid IRI");
30-
// }
3128
this.namespace = IRIUtils.guessNamespace(fullIRI);
3229
this.localName = IRIUtils.guessLocalName(fullIRI);
3330
}

src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractTurtleTriGListener.java

Lines changed: 182 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ public abstract class AbstractTurtleTriGListener {
2727
public Resource currentSubject;
2828
public IRI currentPredicate;
2929

30+
private final java.util.Set<String> explicitlyDeclaredPrefixes = new java.util.HashSet<>();
31+
3032
/**
3133
* Constructs a parser listener with the specified model, factory and base URI.
3234
*
@@ -59,7 +61,9 @@ public void initializeBasePrefix() {
5961
*/
6062
public String extractAndUnescapeIRI(String text) {
6163
String iri = text.substring(1, text.length() - 1);
62-
return unescapeIRI(iri);
64+
iri = unescapeIRI(iri);
65+
validateIRI(iri);
66+
return iri;
6367
}
6468

6569
/**
@@ -69,6 +73,8 @@ public String extractAndUnescapeIRI(String text) {
6973
*/
7074
public void updateBaseURI(String newBase) {
7175
this.baseURI = resolveIRIAgainstBase(newBase);
76+
validateIRI(this.baseURI);
77+
7278
prefixMap.put(ParserConstants.EMPTY_STRING, this.baseURI);
7379
model.setNamespace(ParserConstants.EMPTY_STRING, this.baseURI);
7480
}
@@ -81,8 +87,11 @@ public void updateBaseURI(String newBase) {
8187
*/
8288
public void registerPrefix(String prefix, String iri) {
8389
String resolvedIRI = resolveIRIAgainstBase(iri);
90+
validateIRI(resolvedIRI);
8491
prefixMap.put(prefix, resolvedIRI);
8592
model.setNamespace(prefix, resolvedIRI);
93+
94+
explicitlyDeclaredPrefixes.add(prefix);
8695
}
8796

8897
/**
@@ -109,6 +118,7 @@ public String resolveIRI(String raw) {
109118
if (raw.startsWith(ParserConstants.IRI_START) && raw.endsWith(ParserConstants.IRI_END)) {
110119
String iri = raw.substring(1, raw.length() - 1);
111120
iri = unescapeIRI(iri);
121+
validateIRI(iri);
112122
return iri.isEmpty() ? getEffectiveBaseURI() : resolveIRIAgainstBase(iri);
113123
}
114124

@@ -117,23 +127,33 @@ public String resolveIRI(String raw) {
117127
String prefix = parts[0];
118128
String localName = parts[1];
119129

130+
if (prefix.isEmpty() && !explicitlyDeclaredPrefixes.contains("")) {
131+
throw new ParsingErrorException(
132+
"Syntax error: prefixed name ':' + '" + localName + "' used but ':' prefix was never declared. " +
133+
"Use @prefix : <baseURI> to declare the empty prefix."
134+
);
135+
}
136+
120137
if (prefixMap.containsKey(prefix)) {
121138
localName = unescapeIRI(localName);
122139
String ns = prefixMap.get(prefix);
123140
if (ns != null) {
124-
return ns + localName;
141+
String result = ns + localName;
142+
validateIRI(result);
143+
return result;
125144
}
126-
}
127-
128-
if (isAbsoluteIRI(raw)) {
145+
} else if (isAbsoluteIRI(raw)) {
129146
return raw;
147+
} else {
148+
throw new ParsingErrorException("Undeclared prefix: " + prefix);
130149
}
131-
132-
throw new ParsingErrorException("Undeclared prefix: " + prefix);
133150
}
134151

135-
return resolveIRIAgainstBase(raw);
152+
String result = resolveIRIAgainstBase(raw);
153+
return result;
136154

155+
} catch (ParsingErrorException e) {
156+
throw e;
137157
} catch (IllegalArgumentException e) {
138158
throw new ParsingErrorException(e.getMessage(), e);
139159
}
@@ -448,6 +468,7 @@ public String getEffectiveBaseURI() {
448468
String effective = (baseURI != null && !baseURI.isEmpty()) ? baseURI : ParserConstants.getDefaultBaseURI();
449469
return normalizeURI(effective);
450470
}
471+
451472
/**
452473
* Processes Unicode escape sequences in IRIs.
453474
*
@@ -630,6 +651,159 @@ public Literal createNumericLiteral(String text, NumericType type) {
630651
}
631652
}
632653

654+
/**
655+
* Validates that an IRI contains only valid characters after escape sequence processing.
656+
*
657+
* @param iri the IRI string to validate (after escape sequences have been processed)
658+
* @throws ParsingErrorException if the IRI contains forbidden characters
659+
*/
660+
private void validateIRI(String iri) throws ParsingErrorException {
661+
if (iri == null || iri.isEmpty()) {
662+
return; // Empty IRIs are acceptable
663+
}
664+
665+
666+
// Check each character in the IRI
667+
for (int i = 0; i < iri.length(); i++) {
668+
char c = iri.charAt(i);
669+
670+
// Check for forbidden characters
671+
if (isInvalidIRICharacter(c)) {
672+
String codePoint = String.format("U+%04X", (int) c);
673+
String charDesc = getCharacterDescription(c);
674+
String displayIRI = escapeForDisplay(iri);
675+
676+
677+
throw new ParsingErrorException(
678+
"Invalid character in IRI: " + codePoint + " (" + charDesc + ") " +
679+
"at position " + i + ". " +
680+
"IRI after escape processing: " + displayIRI + ". " +
681+
"IRIs cannot contain space, control characters, or reserved characters."
682+
);
683+
}
684+
}
685+
686+
}
687+
688+
/**
689+
* Checks if a character is invalid in an IRI according to RFC 3987.
690+
*
691+
* @param c the character to validate
692+
* @return true if the character is forbidden in IRIs
693+
*/
694+
private boolean isInvalidIRICharacter(char c) {
695+
// Space (U+0020) - NOT ALLOWED
696+
if (c == 0x20) {
697+
return true;
698+
}
699+
700+
// Control characters (U+0000-U+001F) - NOT ALLOWED
701+
if (c >= 0x00 && c <= 0x1F) {
702+
return true;
703+
}
704+
705+
// DEL (U+007F) - NOT ALLOWED
706+
if (c == 0x7F) {
707+
return true;
708+
}
709+
710+
// High control characters (U+0080-U+009F) - NOT ALLOWED
711+
if (c >= 0x80 && c <= 0x9F) {
712+
return true;
713+
}
714+
715+
switch (c) {
716+
case '<': // U+003C - less than
717+
case '>': // U+003E - greater than
718+
case '{': // U+007B - left curly bracket
719+
case '}': // U+007D - right curly bracket
720+
case '\\': // U+005C - backslash
721+
case '^': // U+005E - circumflex
722+
case '`': // U+0060 - grave accent
723+
case '|': // U+007C - pipe
724+
case '"': // U+0022 - quotation mark
725+
return true;
726+
default:
727+
return false;
728+
}
729+
}
730+
731+
/**
732+
* Returns a human-readable description of a character for error messages.
733+
*
734+
* @param c the character to describe
735+
* @return human-readable description
736+
*/
737+
private String getCharacterDescription(char c) {
738+
switch (c) {
739+
case 0x00:
740+
return "null character";
741+
case 0x09:
742+
return "tab";
743+
case 0x0A:
744+
return "line feed";
745+
case 0x0D:
746+
return "carriage return";
747+
case 0x20:
748+
return "space";
749+
case 0x7F:
750+
return "delete";
751+
case '<':
752+
return "less than";
753+
case '>':
754+
return "greater than";
755+
case '{':
756+
return "left curly bracket";
757+
case '}':
758+
return "right curly bracket";
759+
case '\\':
760+
return "backslash";
761+
case '^':
762+
return "circumflex";
763+
case '`':
764+
return "grave accent";
765+
case '|':
766+
return "pipe";
767+
case '"':
768+
return "quotation mark";
769+
default:
770+
if (c < 0x20) {
771+
return "control character";
772+
} else if (c >= 0x80 && c <= 0x9F) {
773+
return "high control character";
774+
} else {
775+
return String.format("character '%c'", c);
776+
}
777+
}
778+
}
779+
780+
/**
781+
* Escapes characters in a string for display in error messages.
782+
*
783+
* @param iri the IRI to escape for display
784+
* @return escaped version suitable for error messages
785+
*/
786+
private String escapeForDisplay(String iri) {
787+
StringBuilder sb = new StringBuilder();
788+
for (int i = 0; i < iri.length(); i++) {
789+
char c = iri.charAt(i);
790+
if (c < 0x20 || (c >= 0x7F && c <= 0x9F)) {
791+
// Display control characters as Unicode escapes
792+
sb.append(String.format("\\u%04X", (int) c));
793+
} else if (c > 0x7E) {
794+
// Display non-ASCII as Unicode escapes for clarity
795+
sb.append(String.format("\\u%04X", (int) c));
796+
} else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"') {
797+
// Display reserved characters with backslash escape
798+
sb.append('\\').append(c);
799+
} else {
800+
// Display normal ASCII characters as-is
801+
sb.append(c);
802+
}
803+
}
804+
return sb.toString();
805+
}
806+
633807
/**
634808
* Enumeration of numeric literal types corresponding to XSD datatypes.
635809
*/

0 commit comments

Comments
 (0)