@@ -510,8 +510,15 @@ class XMLTestJVM {
510510 }
511511 }
512512
513+ // With both internal and external Xerces now on the classpath, we explicitly disambiguate which one we want:
514+ def xercesInternal : javax.xml.parsers.SAXParserFactory =
515+ javax.xml.parsers.SAXParserFactory .newInstance(" com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl" , null )
516+
517+ def xercesExternal : javax.xml.parsers.SAXParserFactory =
518+ javax.xml.parsers.SAXParserFactory .newInstance(" org.apache.xerces.jaxp.SAXParserFactoryImpl" , null )
519+
513520 /** Default SAXParserFactory */
514- val defaultParserFactory : javax.xml.parsers.SAXParserFactory = javax.xml.parsers. SAXParserFactory .newInstance
521+ val defaultParserFactory : javax.xml.parsers.SAXParserFactory = xercesInternal
515522
516523 @ throws(classOf [org.xml.sax.SAXNotRecognizedException ])
517524 def issue17UnrecognizedFeature (): Unit = {
@@ -629,7 +636,7 @@ class XMLTestJVM {
629636 // using namespace-aware parser, this works with FactoryAdapter enhanced to handle startPrefixMapping() events;
630637 // see https://github.com/scala/scala-xml/issues/506
631638 def roundtrip (namespaceAware : Boolean , xml : String ): Unit = {
632- val parserFactory : javax.xml.parsers.SAXParserFactory = javax.xml.parsers. SAXParserFactory .newInstance()
639+ val parserFactory : javax.xml.parsers.SAXParserFactory = xercesInternal
633640 parserFactory.setFeature(" http://javax.xml.XMLConstants/feature/secure-processing" , true )
634641 parserFactory.setFeature(" http://apache.org/xml/features/nonvalidating/load-external-dtd" , false )
635642 parserFactory.setFeature(" http://apache.org/xml/features/disallow-doctype-decl" , true )
@@ -656,7 +663,7 @@ class XMLTestJVM {
656663
657664 @ UnitTest
658665 def useXMLReaderWithXMLFilter (): Unit = {
659- val parent : org.xml.sax.XMLReader = javax.xml.parsers. SAXParserFactory .newInstance .newSAXParser.getXMLReader
666+ val parent : org.xml.sax.XMLReader = xercesInternal .newSAXParser.getXMLReader
660667 val filter : org.xml.sax.XMLFilter = new org.xml.sax.helpers.XMLFilterImpl (parent) {
661668 override def characters (ch : Array [Char ], start : Int , length : Int ): Unit = {
662669 for (i <- 0 until length) if (ch(start+ i) == 'a' ) ch(start+ i) = 'b'
@@ -682,6 +689,67 @@ class XMLTestJVM {
682689 assertTrue(gotAnError)
683690 }
684691
692+ // Now that we can use XML parser configured to be namespace-aware,
693+ // we can also configure it to be XInclude-aware and process XML Includes:
694+ def check (
695+ parserFactory : javax.xml.parsers.SAXParserFactory ,
696+ resourceName : String ,
697+ expected : String
698+ ): Unit = {
699+ parserFactory.setNamespaceAware(true )
700+ parserFactory.setXIncludeAware(true )
701+ val actual : String = XML
702+ .withSAXParser(parserFactory.newSAXParser)
703+ .load(getClass.getResource(resourceName).toString)
704+ .toString
705+
706+ assertEquals(expected, actual)
707+ }
708+
709+ // Here we demonstrate that XInclude works with both the external and the built-in Xerces:
710+
711+ val includerExpected : String =
712+ s """ <includer>
713+ | <includee xml:base="includee.xml">
714+ | <content>Blah!</content>
715+ |</includee>
716+ |</includer> """ .stripMargin
717+
718+ @ UnitTest def xIncludeWithExternalXerces (): Unit = check(xercesExternal, " includer.xml" , includerExpected)
719+ @ UnitTest def xIncludeWithInternalXerces (): Unit = check(xercesInternal, " includer.xml" , includerExpected)
720+
721+ // And here we demonstrate that both external and built-in Xerces report incorrect `xml:base`
722+ // when the XML file included contains its own include, and included files are not in the same directory:
723+ // `xml:base` on the `<collection>` element is incorrect
724+ // books/book/author/volume/1.xml instead of the correct
725+ // archive/books/book/author/volume/1.xml!
726+ val siteUnfortunatelyExpected : String =
727+ s """ <site xmlns:xi="http://www.w3.org/2001/XInclude">
728+ | <store xml:base="archive/books.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
729+ | <store xml:base="archive/books/book/author.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
730+ | <collection n="1" xml:base="books/book/author/volume/1.xml"/>
731+ |</store>
732+ |</store>
733+ |</site> """ .stripMargin
734+
735+ // Turns out, this is a known Xerces bug https://issues.apache.org/jira/browse/XERCESJ-1102:
736+ // - the bug was reported in October 2005 - more then seventeen years ago;
737+ // - a patch fixing it (that I have not verified personally) was submitted many years ago;
738+ // - the bug is still not fixed in the 2023 release of Xerces;
739+ // - the bug was discussed by the Saxon users in https://saxonica.plan.io/issues/4664,
740+ // and is allegedly fixed in SaxonC 11.1 - although how can this be with Saxon not shipping its own Xerces is not clear.
741+ //
742+ // In my own application, I had to "fix up" incorrect values produced by Xerces, taking into account
743+ // specific directory layout being used. I can only speculate what others do, but none of the alternatives sound great:
744+ // - avoid using nested includes altogether or flatten the directory hierarchy to appease the bug;
745+ // - use privately patched version of Xerces;
746+ // - use Saxon DOM parsing instead of Xerces' SAX.
747+ //
748+ // I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere
749+ // has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs:
750+ @ UnitTest def xIncludeFailWithExternalXerces (): Unit = check(xercesExternal, " site.xml" , siteUnfortunatelyExpected)
751+ @ UnitTest def xIncludeFailWithInternalXerces (): Unit = check(xercesInternal, " site.xml" , siteUnfortunatelyExpected)
752+
685753 @ UnitTest
686754 def nodeSeqNs (): Unit = {
687755 val x : NodeBuffer = {
0 commit comments