22
33import fr .inria .corese .core .next .api .*;
44import fr .inria .corese .core .next .impl .common .literal .XSD ;
5+ import fr .inria .corese .core .next .impl .common .util .IRIUtils ;
56import fr .inria .corese .core .next .impl .common .vocabulary .RDF ;
67import fr .inria .corese .core .next .impl .exception .ParsingErrorException ;
78import fr .inria .corese .core .next .impl .io .parser .util .ParserConstants ;
@@ -58,12 +59,12 @@ public void initializeBasePrefix() {
5859 *
5960 * @param text raw IRI text including angle brackets
6061 * @return unescaped IRI string
62+ * @throws ParsingErrorException if the IRI contains invalid characters after escape processing
6163 */
6264 public String extractAndUnescapeIRI (String text ) {
6365 String iri = text .substring (1 , text .length () - 1 );
6466 iri = unescapeIRI (iri );
65- validateIRI (iri );
66- return iri ;
67+ return validateIRI (iri ) ? iri : iri ;
6768 }
6869
6970 /**
@@ -87,7 +88,7 @@ public void updateBaseURI(String newBase) {
8788 */
8889 public void registerPrefix (String prefix , String iri ) {
8990 String resolvedIRI = resolveIRIAgainstBase (iri );
90- validateIRI (resolvedIRI );
91+ validateIRI (resolvedIRI );
9192 prefixMap .put (prefix , resolvedIRI );
9293 model .setNamespace (prefix , resolvedIRI );
9394
@@ -655,24 +656,23 @@ public Literal createNumericLiteral(String text, NumericType type) {
655656 * Validates that an IRI contains only valid characters after escape sequence processing.
656657 *
657658 * @param iri the IRI string to validate (after escape sequences have been processed)
659+ * @return true if the IRI is valid
658660 * @throws ParsingErrorException if the IRI contains forbidden characters
659661 */
660- private void validateIRI (String iri ) throws ParsingErrorException {
662+ private boolean validateIRI (String iri ) throws ParsingErrorException {
661663 if (iri == null || iri .isEmpty ()) {
662- return ; // Empty IRIs are acceptable
664+ return true ; // Empty IRIs are acceptable
663665 }
664666
665-
666667 // Check each character in the IRI
667668 for (int i = 0 ; i < iri .length (); i ++) {
668669 char c = iri .charAt (i );
669670
670671 // Check for forbidden characters
671- if (isInvalidIRICharacter (c )) {
672+ if (IRIUtils . isInvalidIRICharacter (c )) {
672673 String codePoint = String .format ("U+%04X" , (int ) c );
673- String charDesc = getCharacterDescription (c );
674- String displayIRI = escapeForDisplay (iri );
675-
674+ String charDesc = IRIUtils .getCharacterDescription (c );
675+ String displayIRI = IRIUtils .escapeForDisplay (iri );
676676
677677 throw new ParsingErrorException (
678678 "Invalid character in IRI: " + codePoint + " (" + charDesc + ") " +
@@ -682,126 +682,7 @@ private void validateIRI(String iri) throws ParsingErrorException {
682682 );
683683 }
684684 }
685-
686- }
687-
688- /**
689- * Checks if a character is invalid in an IRI according to RFC 3987.
690- *
691- * @param c the character to validate
692- * @return true if the character is forbidden in IRIs
693- */
694- private boolean isInvalidIRICharacter (char c ) {
695- // Space (U+0020) - NOT ALLOWED
696- if (c == 0x20 ) {
697- return true ;
698- }
699-
700- // Control characters (U+0000-U+001F) - NOT ALLOWED
701- if (c >= 0x00 && c <= 0x1F ) {
702- return true ;
703- }
704-
705- // DEL (U+007F) - NOT ALLOWED
706- if (c == 0x7F ) {
707- return true ;
708- }
709-
710- // High control characters (U+0080-U+009F) - NOT ALLOWED
711- if (c >= 0x80 && c <= 0x9F ) {
712- return true ;
713- }
714-
715- switch (c ) {
716- case '<' : // U+003C - less than
717- case '>' : // U+003E - greater than
718- case '{' : // U+007B - left curly bracket
719- case '}' : // U+007D - right curly bracket
720- case '\\' : // U+005C - backslash
721- case '^' : // U+005E - circumflex
722- case '`' : // U+0060 - grave accent
723- case '|' : // U+007C - pipe
724- case '"' : // U+0022 - quotation mark
725- return true ;
726- default :
727- return false ;
728- }
729- }
730-
731- /**
732- * Returns a human-readable description of a character for error messages.
733- *
734- * @param c the character to describe
735- * @return human-readable description
736- */
737- private String getCharacterDescription (char c ) {
738- switch (c ) {
739- case 0x00 :
740- return "null character" ;
741- case 0x09 :
742- return "tab" ;
743- case 0x0A :
744- return "line feed" ;
745- case 0x0D :
746- return "carriage return" ;
747- case 0x20 :
748- return "space" ;
749- case 0x7F :
750- return "delete" ;
751- case '<' :
752- return "less than" ;
753- case '>' :
754- return "greater than" ;
755- case '{' :
756- return "left curly bracket" ;
757- case '}' :
758- return "right curly bracket" ;
759- case '\\' :
760- return "backslash" ;
761- case '^' :
762- return "circumflex" ;
763- case '`' :
764- return "grave accent" ;
765- case '|' :
766- return "pipe" ;
767- case '"' :
768- return "quotation mark" ;
769- default :
770- if (c < 0x20 ) {
771- return "control character" ;
772- } else if (c >= 0x80 && c <= 0x9F ) {
773- return "high control character" ;
774- } else {
775- return String .format ("character '%c'" , c );
776- }
777- }
778- }
779-
780- /**
781- * Escapes characters in a string for display in error messages.
782- *
783- * @param iri the IRI to escape for display
784- * @return escaped version suitable for error messages
785- */
786- private String escapeForDisplay (String iri ) {
787- StringBuilder sb = new StringBuilder ();
788- for (int i = 0 ; i < iri .length (); i ++) {
789- char c = iri .charAt (i );
790- if (c < 0x20 || (c >= 0x7F && c <= 0x9F )) {
791- // Display control characters as Unicode escapes
792- sb .append (String .format ("\\ u%04X" , (int ) c ));
793- } else if (c > 0x7E ) {
794- // Display non-ASCII as Unicode escapes for clarity
795- sb .append (String .format ("\\ u%04X" , (int ) c ));
796- } else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"' ) {
797- // Display reserved characters with backslash escape
798- sb .append ('\\' ).append (c );
799- } else {
800- // Display normal ASCII characters as-is
801- sb .append (c );
802- }
803- }
804- return sb .toString ();
685+ return true ;
805686 }
806687
807688 /**
0 commit comments