@@ -27,6 +27,8 @@ public abstract class AbstractTurtleTriGListener {
2727 public Resource currentSubject ;
2828 public IRI currentPredicate ;
2929
30+ private final java .util .Set <String > explicitlyDeclaredPrefixes = new java .util .HashSet <>();
31+
3032 /**
3133 * Constructs a parser listener with the specified model, factory and base URI.
3234 *
@@ -59,7 +61,9 @@ public void initializeBasePrefix() {
5961 */
6062 public String extractAndUnescapeIRI (String text ) {
6163 String iri = text .substring (1 , text .length () - 1 );
62- return unescapeIRI (iri );
64+ iri = unescapeIRI (iri );
65+ validateIRI (iri );
66+ return iri ;
6367 }
6468
6569 /**
@@ -69,6 +73,8 @@ public String extractAndUnescapeIRI(String text) {
6973 */
7074 public void updateBaseURI (String newBase ) {
7175 this .baseURI = resolveIRIAgainstBase (newBase );
76+ validateIRI (this .baseURI );
77+
7278 prefixMap .put (ParserConstants .EMPTY_STRING , this .baseURI );
7379 model .setNamespace (ParserConstants .EMPTY_STRING , this .baseURI );
7480 }
@@ -81,8 +87,11 @@ public void updateBaseURI(String newBase) {
8187 */
8288 public void registerPrefix (String prefix , String iri ) {
8389 String resolvedIRI = resolveIRIAgainstBase (iri );
90+ validateIRI (resolvedIRI );
8491 prefixMap .put (prefix , resolvedIRI );
8592 model .setNamespace (prefix , resolvedIRI );
93+
94+ explicitlyDeclaredPrefixes .add (prefix );
8695 }
8796
8897 /**
@@ -109,6 +118,7 @@ public String resolveIRI(String raw) {
109118 if (raw .startsWith (ParserConstants .IRI_START ) && raw .endsWith (ParserConstants .IRI_END )) {
110119 String iri = raw .substring (1 , raw .length () - 1 );
111120 iri = unescapeIRI (iri );
121+ validateIRI (iri );
112122 return iri .isEmpty () ? getEffectiveBaseURI () : resolveIRIAgainstBase (iri );
113123 }
114124
@@ -117,23 +127,33 @@ public String resolveIRI(String raw) {
117127 String prefix = parts [0 ];
118128 String localName = parts [1 ];
119129
130+ if (prefix .isEmpty () && !explicitlyDeclaredPrefixes .contains ("" )) {
131+ throw new ParsingErrorException (
132+ "Syntax error: prefixed name ':' + '" + localName + "' used but ':' prefix was never declared. " +
133+ "Use @prefix : <baseURI> to declare the empty prefix."
134+ );
135+ }
136+
120137 if (prefixMap .containsKey (prefix )) {
121138 localName = unescapeIRI (localName );
122139 String ns = prefixMap .get (prefix );
123140 if (ns != null ) {
124- return ns + localName ;
141+ String result = ns + localName ;
142+ validateIRI (result );
143+ return result ;
125144 }
126- }
127-
128- if (isAbsoluteIRI (raw )) {
145+ } else if (isAbsoluteIRI (raw )) {
129146 return raw ;
147+ } else {
148+ throw new ParsingErrorException ("Undeclared prefix: " + prefix );
130149 }
131-
132- throw new ParsingErrorException ("Undeclared prefix: " + prefix );
133150 }
134151
135- return resolveIRIAgainstBase (raw );
152+ String result = resolveIRIAgainstBase (raw );
153+ return result ;
136154
155+ } catch (ParsingErrorException e ) {
156+ throw e ;
137157 } catch (IllegalArgumentException e ) {
138158 throw new ParsingErrorException (e .getMessage (), e );
139159 }
@@ -448,6 +468,7 @@ public String getEffectiveBaseURI() {
448468 String effective = (baseURI != null && !baseURI .isEmpty ()) ? baseURI : ParserConstants .getDefaultBaseURI ();
449469 return normalizeURI (effective );
450470 }
471+
451472 /**
452473 * Processes Unicode escape sequences in IRIs.
453474 *
@@ -630,6 +651,159 @@ public Literal createNumericLiteral(String text, NumericType type) {
630651 }
631652 }
632653
654+ /**
655+ * Validates that an IRI contains only valid characters after escape sequence processing.
656+ *
657+ * @param iri the IRI string to validate (after escape sequences have been processed)
658+ * @throws ParsingErrorException if the IRI contains forbidden characters
659+ */
660+ private void validateIRI (String iri ) throws ParsingErrorException {
661+ if (iri == null || iri .isEmpty ()) {
662+ return ; // Empty IRIs are acceptable
663+ }
664+
665+
666+ // Check each character in the IRI
667+ for (int i = 0 ; i < iri .length (); i ++) {
668+ char c = iri .charAt (i );
669+
670+ // Check for forbidden characters
671+ if (isInvalidIRICharacter (c )) {
672+ String codePoint = String .format ("U+%04X" , (int ) c );
673+ String charDesc = getCharacterDescription (c );
674+ String displayIRI = escapeForDisplay (iri );
675+
676+
677+ throw new ParsingErrorException (
678+ "Invalid character in IRI: " + codePoint + " (" + charDesc + ") " +
679+ "at position " + i + ". " +
680+ "IRI after escape processing: " + displayIRI + ". " +
681+ "IRIs cannot contain space, control characters, or reserved characters."
682+ );
683+ }
684+ }
685+
686+ }
687+
688+ /**
689+ * Checks if a character is invalid in an IRI according to RFC 3987.
690+ *
691+ * @param c the character to validate
692+ * @return true if the character is forbidden in IRIs
693+ */
694+ private boolean isInvalidIRICharacter (char c ) {
695+ // Space (U+0020) - NOT ALLOWED
696+ if (c == 0x20 ) {
697+ return true ;
698+ }
699+
700+ // Control characters (U+0000-U+001F) - NOT ALLOWED
701+ if (c >= 0x00 && c <= 0x1F ) {
702+ return true ;
703+ }
704+
705+ // DEL (U+007F) - NOT ALLOWED
706+ if (c == 0x7F ) {
707+ return true ;
708+ }
709+
710+ // High control characters (U+0080-U+009F) - NOT ALLOWED
711+ if (c >= 0x80 && c <= 0x9F ) {
712+ return true ;
713+ }
714+
715+ switch (c ) {
716+ case '<' : // U+003C - less than
717+ case '>' : // U+003E - greater than
718+ case '{' : // U+007B - left curly bracket
719+ case '}' : // U+007D - right curly bracket
720+ case '\\' : // U+005C - backslash
721+ case '^' : // U+005E - circumflex
722+ case '`' : // U+0060 - grave accent
723+ case '|' : // U+007C - pipe
724+ case '"' : // U+0022 - quotation mark
725+ return true ;
726+ default :
727+ return false ;
728+ }
729+ }
730+
731+ /**
732+ * Returns a human-readable description of a character for error messages.
733+ *
734+ * @param c the character to describe
735+ * @return human-readable description
736+ */
737+ private String getCharacterDescription (char c ) {
738+ switch (c ) {
739+ case 0x00 :
740+ return "null character" ;
741+ case 0x09 :
742+ return "tab" ;
743+ case 0x0A :
744+ return "line feed" ;
745+ case 0x0D :
746+ return "carriage return" ;
747+ case 0x20 :
748+ return "space" ;
749+ case 0x7F :
750+ return "delete" ;
751+ case '<' :
752+ return "less than" ;
753+ case '>' :
754+ return "greater than" ;
755+ case '{' :
756+ return "left curly bracket" ;
757+ case '}' :
758+ return "right curly bracket" ;
759+ case '\\' :
760+ return "backslash" ;
761+ case '^' :
762+ return "circumflex" ;
763+ case '`' :
764+ return "grave accent" ;
765+ case '|' :
766+ return "pipe" ;
767+ case '"' :
768+ return "quotation mark" ;
769+ default :
770+ if (c < 0x20 ) {
771+ return "control character" ;
772+ } else if (c >= 0x80 && c <= 0x9F ) {
773+ return "high control character" ;
774+ } else {
775+ return String .format ("character '%c'" , c );
776+ }
777+ }
778+ }
779+
780+ /**
781+ * Escapes characters in a string for display in error messages.
782+ *
783+ * @param iri the IRI to escape for display
784+ * @return escaped version suitable for error messages
785+ */
786+ private String escapeForDisplay (String iri ) {
787+ StringBuilder sb = new StringBuilder ();
788+ for (int i = 0 ; i < iri .length (); i ++) {
789+ char c = iri .charAt (i );
790+ if (c < 0x20 || (c >= 0x7F && c <= 0x9F )) {
791+ // Display control characters as Unicode escapes
792+ sb .append (String .format ("\\ u%04X" , (int ) c ));
793+ } else if (c > 0x7E ) {
794+ // Display non-ASCII as Unicode escapes for clarity
795+ sb .append (String .format ("\\ u%04X" , (int ) c ));
796+ } else if (c == '<' || c == '>' || c == '{' || c == '}' || c == '\\' || c == '^' || c == '`' || c == '|' || c == '"' ) {
797+ // Display reserved characters with backslash escape
798+ sb .append ('\\' ).append (c );
799+ } else {
800+ // Display normal ASCII characters as-is
801+ sb .append (c );
802+ }
803+ }
804+ return sb .toString ();
805+ }
806+
633807 /**
634808 * Enumeration of numeric literal types corresponding to XSD datatypes.
635809 */
0 commit comments