Skip to content

Commit 7b57916

Browse files
committed
Demonstrate XInclude work - and fail
1 parent ed329cc commit 7b57916

File tree

8 files changed

+88
-3
lines changed

8 files changed

+88
-3
lines changed

build.sbt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
113113
libraryDependencies += "junit" % "junit" % "4.13.2" % Test,
114114
libraryDependencies += "com.github.sbt" % "junit-interface" % "0.13.3" % Test,
115115
libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.12.0" % Test,
116+
libraryDependencies += "xerces" % "xercesImpl" % "2.12.2" % Test,
116117
libraryDependencies ++= (CrossVersion.partialVersion(scalaVersion.value) match {
117118
case Some((3, _)) =>
118119
Seq()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<store xmlns:xi="http://www.w3.org/2001/XInclude">
2+
<xi:include href="books/book/author.xml"/>
3+
</store>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<store xmlns:xi="http://www.w3.org/2001/XInclude">
2+
<xi:include href="author/volume/1.xml"/>
3+
</store>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<collection n="1"/>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<includee>
2+
<content>Blah!</content>
3+
</includee>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<includer>
2+
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="includee.xml"/>
3+
</includer>
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
<site xmlns:xi="http://www.w3.org/2001/XInclude">
2+
<xi:include href="archive/books.xml"/>
3+
</site>

jvm/src/test/scala/scala/xml/XMLTest.scala

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,8 +510,15 @@ class XMLTestJVM {
510510
}
511511
}
512512

513+
// With both internal and external Xerces now on the classpath, we explicitly disambiguate which one we want:
514+
def xercesInternal: javax.xml.parsers.SAXParserFactory =
515+
javax.xml.parsers.SAXParserFactory.newInstance("com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl", null)
516+
517+
def xercesExternal: javax.xml.parsers.SAXParserFactory =
518+
javax.xml.parsers.SAXParserFactory.newInstance("org.apache.xerces.jaxp.SAXParserFactoryImpl", null)
519+
513520
/** Default SAXParserFactory */
514-
val defaultParserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance
521+
val defaultParserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal
515522

516523
@throws(classOf[org.xml.sax.SAXNotRecognizedException])
517524
def issue17UnrecognizedFeature(): Unit = {
@@ -629,7 +636,7 @@ class XMLTestJVM {
629636
// using namespace-aware parser, this works with FactoryAdapter enhanced to handle startPrefixMapping() events;
630637
// see https://github.com/scala/scala-xml/issues/506
631638
def roundtrip(namespaceAware: Boolean, xml: String): Unit = {
632-
val parserFactory: javax.xml.parsers.SAXParserFactory = javax.xml.parsers.SAXParserFactory.newInstance()
639+
val parserFactory: javax.xml.parsers.SAXParserFactory = xercesInternal
633640
parserFactory.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true)
634641
parserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
635642
parserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true)
@@ -656,7 +663,7 @@ class XMLTestJVM {
656663

657664
@UnitTest
658665
def useXMLReaderWithXMLFilter(): Unit = {
659-
val parent: org.xml.sax.XMLReader = javax.xml.parsers.SAXParserFactory.newInstance.newSAXParser.getXMLReader
666+
val parent: org.xml.sax.XMLReader = xercesInternal.newSAXParser.getXMLReader
660667
val filter: org.xml.sax.XMLFilter = new org.xml.sax.helpers.XMLFilterImpl(parent) {
661668
override def characters(ch: Array[Char], start: Int, length: Int): Unit = {
662669
for (i <- 0 until length) if (ch(start+i) == 'a') ch(start+i) = 'b'
@@ -682,6 +689,67 @@ class XMLTestJVM {
682689
assertTrue(gotAnError)
683690
}
684691

692+
// Now that we can use XML parser configured to be namespace-aware,
693+
// we can also configure it to be XInclude-aware and process XML Includes:
694+
def check(
695+
parserFactory: javax.xml.parsers.SAXParserFactory,
696+
resourceName: String,
697+
expected: String
698+
): Unit = {
699+
parserFactory.setNamespaceAware(true)
700+
parserFactory.setXIncludeAware(true)
701+
val actual: String = XML
702+
.withSAXParser(parserFactory.newSAXParser)
703+
.load(getClass.getResource(resourceName).toString)
704+
.toString
705+
706+
assertEquals(expected, actual)
707+
}
708+
709+
// Here we demonstrate that XInclude works with both the external and the built-in Xerces:
710+
711+
val includerExpected: String =
712+
s"""<includer>
713+
| <includee xml:base="includee.xml">
714+
| <content>Blah!</content>
715+
|</includee>
716+
|</includer>""".stripMargin
717+
718+
@UnitTest def xIncludeWithExternalXerces(): Unit = check(xercesExternal, "includer.xml", includerExpected)
719+
@UnitTest def xIncludeWithInternalXerces(): Unit = check(xercesInternal, "includer.xml", includerExpected)
720+
721+
// And here we demonstrate that both external and built-in Xerces report incorrect `xml:base`
722+
// when the XML file included contains its own include, and included files are not in the same directory:
723+
// `xml:base` on the `<collection>` element is incorrect
724+
// books/book/author/volume/1.xml instead of the correct
725+
// archive/books/book/author/volume/1.xml!
726+
val siteUnfortunatelyExpected: String =
727+
s"""<site xmlns:xi="http://www.w3.org/2001/XInclude">
728+
| <store xml:base="archive/books.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
729+
| <store xml:base="archive/books/book/author.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
730+
| <collection n="1" xml:base="books/book/author/volume/1.xml"/>
731+
|</store>
732+
|</store>
733+
|</site>""".stripMargin
734+
735+
// Turns out, this is a known Xerces bug https://issues.apache.org/jira/browse/XERCESJ-1102:
736+
// - the bug was reported in October 2005 - more then seventeen years ago;
737+
// - a patch fixing it (that I have not verified personally) was submitted many years ago;
738+
// - the bug is still not fixed in the 2023 release of Xerces;
739+
// - the bug was discussed by the Saxon users in https://saxonica.plan.io/issues/4664,
740+
// and is allegedly fixed in SaxonC 11.1 - although how can this be with Saxon not shipping its own Xerces is not clear.
741+
//
742+
// In my own application, I had to "fix up" incorrect values produced by Xerces, taking into account
743+
// specific directory layout being used. I can only speculate what others do, but none of the alternatives sound great:
744+
// - avoid using nested includes altogether or flatten the directory hierarchy to appease the bug;
745+
// - use privately patched version of Xerces;
746+
// - use Saxon DOM parsing instead of Xerces' SAX.
747+
//
748+
// I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere
749+
// has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs:
750+
@UnitTest def xIncludeFailWithExternalXerces(): Unit = check(xercesExternal, "site.xml", siteUnfortunatelyExpected)
751+
@UnitTest def xIncludeFailWithInternalXerces(): Unit = check(xercesInternal, "site.xml", siteUnfortunatelyExpected)
752+
685753
@UnitTest
686754
def nodeSeqNs(): Unit = {
687755
val x: NodeBuffer = {

0 commit comments

Comments
 (0)