Skip to content

Commit 7adc61a

Browse files
abdessamad-abdounMaillPierre
authored andcommitted
[#212-REVUE] Implement standard tests for Canonical RDF into Corese-W3C
1 parent 40c19f5 commit 7adc61a

File tree

5 files changed

+139
-132
lines changed

5 files changed

+139
-132
lines changed

src/main/java/fr/inria/corese/core/next/api/base/model/AbstractIRI.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ protected AbstractIRI(String fullIRI) {
2525
if (fullIRI == null) {
2626
throw new IllegalArgumentException("fullIRI cannot be null");
2727
}
28+
if (!IRIUtils.isStandardIRI(fullIRI)) {
29+
throw new IncorrectFormatException("IRI '" + fullIRI + "' must be a valid IRI");
30+
}
2831
this.namespace = IRIUtils.guessNamespace(fullIRI);
2932
this.localName = IRIUtils.guessLocalName(fullIRI);
3033
}

src/main/java/fr/inria/corese/core/next/impl/common/util/IRIUtils.java

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,4 +185,123 @@ private static boolean isValidURI(String uriString) {
185185
return false;
186186
}
187187
}
188-
}
188+
189+
/**
190+
* Checks if a character is invalid in an IRI according to RFC
191+
*
192+
* @param c the character to validate
193+
* @return true if the character is forbidden in IRIs
194+
*/
195+
public static boolean isInvalidIRICharacter(char c) {
196+
// Space (U+0020) - NOT ALLOWED
197+
if (c == 0x20) {
198+
return true;
199+
}
200+
201+
// Control characters (U+0000-U+001F) - NOT ALLOWED
202+
if (c >= 0x00 && c <= 0x1F) {
203+
return true;
204+
}
205+
206+
// DEL (U+007F) - NOT ALLOWED
207+
if (c == 0x7F) {
208+
return true;
209+
}
210+
211+
// High control characters (U+0080-U+009F) - NOT ALLOWED
212+
if (c >= 0x80 && c <= 0x9F) {
213+
return true;
214+
}
215+
216+
switch (c) {
217+
case '<': // U+003C - less than
218+
case '>': // U+003E - greater than
219+
case '{': // U+007B - left curly bracket
220+
case '}': // U+007D - right curly bracket
221+
case '\\': // U+005C - backslash
222+
case '^': // U+005E - circumflex
223+
case '`': // U+0060 - grave accent
224+
case '|': // U+007C - pipe
225+
case '"': // U+0022 - quotation mark
226+
return true;
227+
default:
228+
return false;
229+
}
230+
}
231+
232+
/**
233+
* Returns a human-readable description of a character for error messages.
234+
*
235+
* @param c the character to describe
236+
* @return human-readable description
237+
*/
238+
public static String getCharacterDescription(char c) {
239+
switch (c) {
240+
case 0x00:
241+
return "null character";
242+
case 0x09:
243+
return "tab";
244+
case 0x0A:
245+
return "line feed";
246+
case 0x0D:
247+
return "carriage return";
248+
case 0x20:
249+
return "space";
250+
case 0x7F:
251+
return "delete";
252+
case '<':
253+
return "less than";
254+
case '>':
255+
return "greater than";
256+
case '{':
257+
return "left curly bracket";
258+
case '}':
259+
return "right curly bracket";
260+
case '\\':
261+
return "backslash";
262+
case '^':
263+
return "circumflex";
264+
case '`':
265+
return "grave accent";
266+
case '|':
267+
return "pipe";
268+
case '"':
269+
return "quotation mark";
270+
default:
271+
if (c < 0x20) {
272+
return "control character";
273+
} else if (c >= 0x80 && c <= 0x9F) {
274+
return "high control character";
275+
} else {
276+
return String.format("character '%c'", c);
277+
}
278+
}
279+
}
280+
281+
/**
282+
* Escapes characters in a string for display in error messages.
283+
*
284+
* @param iri the IRI to escape for display
285+
* @return escaped version suitable for error messages
286+
*/
287+
public static String escapeForDisplay(String iri) {
288+
StringBuilder sb = new StringBuilder();
289+
for (int i = 0; i < iri.length(); i++) {
290+
char c = iri.charAt(i);
291+
if (c < 0x20 || (c >= 0x7F && c <= 0x9F)) {
292+
// Display control characters as Unicode escapes
293+
sb.append(String.format("\\u%04X", (int) c));
294+
} else if (c > 0x7E) {
295+
// Display non-ASCII as Unicode escapes for clarity
296+
sb.append(String.format("\\u%04X", (int) c));
297+
} else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"') {
298+
// Display reserved characters with backslash escape
299+
sb.append('\\').append(c);
300+
} else {
301+
// Display normal ASCII characters as-is
302+
sb.append(c);
303+
}
304+
}
305+
return sb.toString();
306+
}
307+
}

src/main/java/fr/inria/corese/core/next/impl/io/parser/common/AbstractTurtleTriGListener.java

Lines changed: 11 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import fr.inria.corese.core.next.api.*;
44
import fr.inria.corese.core.next.impl.common.literal.XSD;
5+
import fr.inria.corese.core.next.impl.common.util.IRIUtils;
56
import fr.inria.corese.core.next.impl.common.vocabulary.RDF;
67
import fr.inria.corese.core.next.impl.exception.ParsingErrorException;
78
import fr.inria.corese.core.next.impl.io.parser.util.ParserConstants;
@@ -58,12 +59,12 @@ public void initializeBasePrefix() {
5859
*
5960
* @param text raw IRI text including angle brackets
6061
* @return unescaped IRI string
62+
* @throws ParsingErrorException if the IRI contains invalid characters after escape processing
6163
*/
6264
public String extractAndUnescapeIRI(String text) {
6365
String iri = text.substring(1, text.length() - 1);
6466
iri = unescapeIRI(iri);
65-
validateIRI(iri);
66-
return iri;
67+
return validateIRI(iri) ? iri : iri;
6768
}
6869

6970
/**
@@ -87,7 +88,7 @@ public void updateBaseURI(String newBase) {
8788
*/
8889
public void registerPrefix(String prefix, String iri) {
8990
String resolvedIRI = resolveIRIAgainstBase(iri);
90-
validateIRI(resolvedIRI);
91+
validateIRI(resolvedIRI);
9192
prefixMap.put(prefix, resolvedIRI);
9293
model.setNamespace(prefix, resolvedIRI);
9394

@@ -655,24 +656,23 @@ public Literal createNumericLiteral(String text, NumericType type) {
655656
* Validates that an IRI contains only valid characters after escape sequence processing.
656657
*
657658
* @param iri the IRI string to validate (after escape sequences have been processed)
659+
* @return true if the IRI is valid
658660
* @throws ParsingErrorException if the IRI contains forbidden characters
659661
*/
660-
private void validateIRI(String iri) throws ParsingErrorException {
662+
private boolean validateIRI(String iri) throws ParsingErrorException {
661663
if (iri == null || iri.isEmpty()) {
662-
return; // Empty IRIs are acceptable
664+
return true; // Empty IRIs are acceptable
663665
}
664666

665-
666667
// Check each character in the IRI
667668
for (int i = 0; i < iri.length(); i++) {
668669
char c = iri.charAt(i);
669670

670671
// Check for forbidden characters
671-
if (isInvalidIRICharacter(c)) {
672+
if (IRIUtils.isInvalidIRICharacter(c)) {
672673
String codePoint = String.format("U+%04X", (int) c);
673-
String charDesc = getCharacterDescription(c);
674-
String displayIRI = escapeForDisplay(iri);
675-
674+
String charDesc = IRIUtils.getCharacterDescription(c);
675+
String displayIRI = IRIUtils.escapeForDisplay(iri);
676676

677677
throw new ParsingErrorException(
678678
"Invalid character in IRI: " + codePoint + " (" + charDesc + ") " +
@@ -682,126 +682,7 @@ private void validateIRI(String iri) throws ParsingErrorException {
682682
);
683683
}
684684
}
685-
686-
}
687-
688-
/**
689-
* Checks if a character is invalid in an IRI according to RFC 3987.
690-
*
691-
* @param c the character to validate
692-
* @return true if the character is forbidden in IRIs
693-
*/
694-
private boolean isInvalidIRICharacter(char c) {
695-
// Space (U+0020) - NOT ALLOWED
696-
if (c == 0x20) {
697-
return true;
698-
}
699-
700-
// Control characters (U+0000-U+001F) - NOT ALLOWED
701-
if (c >= 0x00 && c <= 0x1F) {
702-
return true;
703-
}
704-
705-
// DEL (U+007F) - NOT ALLOWED
706-
if (c == 0x7F) {
707-
return true;
708-
}
709-
710-
// High control characters (U+0080-U+009F) - NOT ALLOWED
711-
if (c >= 0x80 && c <= 0x9F) {
712-
return true;
713-
}
714-
715-
switch (c) {
716-
case '<': // U+003C - less than
717-
case '>': // U+003E - greater than
718-
case '{': // U+007B - left curly bracket
719-
case '}': // U+007D - right curly bracket
720-
case '\\': // U+005C - backslash
721-
case '^': // U+005E - circumflex
722-
case '`': // U+0060 - grave accent
723-
case '|': // U+007C - pipe
724-
case '"': // U+0022 - quotation mark
725-
return true;
726-
default:
727-
return false;
728-
}
729-
}
730-
731-
/**
732-
* Returns a human-readable description of a character for error messages.
733-
*
734-
* @param c the character to describe
735-
* @return human-readable description
736-
*/
737-
private String getCharacterDescription(char c) {
738-
switch (c) {
739-
case 0x00:
740-
return "null character";
741-
case 0x09:
742-
return "tab";
743-
case 0x0A:
744-
return "line feed";
745-
case 0x0D:
746-
return "carriage return";
747-
case 0x20:
748-
return "space";
749-
case 0x7F:
750-
return "delete";
751-
case '<':
752-
return "less than";
753-
case '>':
754-
return "greater than";
755-
case '{':
756-
return "left curly bracket";
757-
case '}':
758-
return "right curly bracket";
759-
case '\\':
760-
return "backslash";
761-
case '^':
762-
return "circumflex";
763-
case '`':
764-
return "grave accent";
765-
case '|':
766-
return "pipe";
767-
case '"':
768-
return "quotation mark";
769-
default:
770-
if (c < 0x20) {
771-
return "control character";
772-
} else if (c >= 0x80 && c <= 0x9F) {
773-
return "high control character";
774-
} else {
775-
return String.format("character '%c'", c);
776-
}
777-
}
778-
}
779-
780-
/**
781-
* Escapes characters in a string for display in error messages.
782-
*
783-
* @param iri the IRI to escape for display
784-
* @return escaped version suitable for error messages
785-
*/
786-
private String escapeForDisplay(String iri) {
787-
StringBuilder sb = new StringBuilder();
788-
for (int i = 0; i < iri.length(); i++) {
789-
char c = iri.charAt(i);
790-
if (c < 0x20 || (c >= 0x7F && c <= 0x9F)) {
791-
// Display control characters as Unicode escapes
792-
sb.append(String.format("\\u%04X", (int) c));
793-
} else if (c > 0x7E) {
794-
// Display non-ASCII as Unicode escapes for clarity
795-
sb.append(String.format("\\u%04X", (int) c));
796-
} else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"') {
797-
// Display reserved characters with backslash escape
798-
sb.append('\\').append(c);
799-
} else {
800-
// Display normal ASCII characters as-is
801-
sb.append(c);
802-
}
803-
}
804-
return sb.toString();
685+
return true;
805686
}
806687

807688
/**

src/test/java/fr/inria/corese/core/next/api/ValueFactoryTest.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ public void testCreateIRI() {
3636

3737
assertNotNull(this.valueFactory.createIRI(correctIRI));
3838
assertThrows(IncorrectFormatException.class, () -> this.valueFactory.createIRI(incorrectIRI));
39+
assertThrows(IncorrectFormatException.class, () -> this.valueFactory.createIRI(incorrectIRI));
3940
}
4041

4142
@Test

src/test/java/fr/inria/corese/core/next/impl/temp/CoreseIRITest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ public void constructorCoreseNodeTest() {
6565
assertEquals("test", coreseIRI2.getLocalName());
6666
}
6767

68-
68+
@Test
69+
public void constructorStringException() {
70+
assertThrows(IncorrectFormatException.class, () -> new CoreseIRI("test"));
71+
}
6972

7073
}

0 commit comments

Comments
 (0)