11package scala .xml
22
33import org .junit .{Test => UnitTest }
4- import org .junit .Assert .{assertEquals , assertFalse , assertTrue }
4+ import org .junit .Assert .{assertEquals , assertFalse , assertNull , assertThrows , assertTrue }
55import java .io .StringWriter
66import java .io .ByteArrayOutputStream
7+ import java .net .URL
78import scala .xml .dtd .{DocType , PublicID }
89import scala .xml .parsing .ConstructingParser
910import scala .xml .Utility .sort
@@ -681,14 +682,16 @@ class XMLTestJVM {
681682 assertTrue(gotAnError)
682683 }
683684
685+ def resourceUrl (resourceName : String ): URL = getClass.getResource(s " $resourceName.xml " )
686+
684687 // Here we see that opening InputStream prematurely, as was done previously, breaks XInclude.
685688 @ UnitTest (expected = classOf [org.xml.sax.SAXParseException ]) def xIncludeNeedsSystemId (): Unit = {
686689 val parserFactory = xercesInternal
687690 parserFactory.setNamespaceAware(true )
688691 parserFactory.setXIncludeAware(true )
689692 XML
690693 .withSAXParser(parserFactory.newSAXParser)
691- .load(getClass.getResource (" site.xml " ).openStream())
694+ .load(resourceUrl (" site" ).openStream())
692695 .toString
693696 }
694697
@@ -703,7 +706,7 @@ class XMLTestJVM {
703706 parserFactory.setXIncludeAware(true )
704707 val actual : String = XML
705708 .withSAXParser(parserFactory.newSAXParser)
706- .load(getClass.getResource (resourceName))
709+ .load(resourceUrl (resourceName))
707710 .toString
708711
709712 assertEquals(expected, actual)
@@ -718,8 +721,8 @@ class XMLTestJVM {
718721 |</includee>
719722 |</includer> """ .stripMargin
720723
721- @ UnitTest def xIncludeWithExternalXerces (): Unit = check(xercesExternal, " includer.xml " , includerExpected)
722- @ UnitTest def xIncludeWithInternalXerces (): Unit = check(xercesInternal, " includer.xml " , includerExpected)
724+ @ UnitTest def xIncludeWithExternalXerces (): Unit = check(xercesExternal, " includer" , includerExpected)
725+ @ UnitTest def xIncludeWithInternalXerces (): Unit = check(xercesInternal, " includer" , includerExpected)
723726
724727 // And here we demonstrate that both external and built-in Xerces report incorrect `xml:base`
725728 // when the XML file included contains its own include, and included files are not in the same directory:
@@ -750,8 +753,170 @@ class XMLTestJVM {
750753 //
751754 // I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere
752755 // has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs:
753- @ UnitTest def xIncludeFailWithExternalXerces (): Unit = check(xercesExternal, " site.xml" , siteUnfortunatelyExpected)
754- @ UnitTest def xIncludeFailWithInternalXerces (): Unit = check(xercesInternal, " site.xml" , siteUnfortunatelyExpected)
756+ @ UnitTest def xIncludeFailWithExternalXerces (): Unit = check(xercesExternal, " site" , siteUnfortunatelyExpected)
757+ @ UnitTest def xIncludeFailWithInternalXerces (): Unit = check(xercesInternal, " site" , siteUnfortunatelyExpected)
758+
759+ @ UnitTest
760+ def documentBaseURI (): Unit = {
761+ val url : URL = resourceUrl(" site" )
762+ // XMLLoader returns the document's baseURI:
763+ assert(XML .withSAXParser(xercesInternal.newSAXParser).loadDocument(url).baseURI.endsWith(" /test-classes/scala/xml/site.xml" ))
764+ assert(XML .withSAXParser(xercesExternal.newSAXParser).loadDocument(url).baseURI.endsWith(" /test-classes/scala/xml/site.xml" ))
765+ // ConstructingParser does not return it of course: since it uses scala.io.Source it has no idea where is the XML coming from:
766+ assertNull(ConstructingParser .fromSource(scala.io.Source .fromURI(url.toURI), preserveWS = false ).document().baseURI)
767+ }
768+
769+ @ UnitTest
770+ def xmlStandAlone (): Unit = {
771+ val standAlone : String = s """ <?xml version="1.0" standalone="yes"?><a/> """
772+ val nonStandAlone : String = s """ <?xml version="1.0" standalone="no"?><a/> """
773+ val default : String = s """ <?xml version="1.0"?><a/> """
774+ val noXmlDeclaration : String = s """ <a/> """
775+
776+ // ConstructingParser returns standAlone status of the document straight from the `xml` declaration:
777+ assertEquals(Some (true ), ConstructingParser .fromSource(scala.io.Source .fromString(standAlone), preserveWS = false ).document().standAlone)
778+ assertEquals(Some (false ), ConstructingParser .fromSource(scala.io.Source .fromString(nonStandAlone), preserveWS = false ).document().standAlone)
779+ assertTrue(ConstructingParser .fromSource(scala.io.Source .fromString(default), preserveWS = false ).document().standAlone.isEmpty)
780+ // ConstructingParser incorrectly returns null standAlone value when the document does not have the xml declaration:
781+ assertNull(ConstructingParser .fromSource(scala.io.Source .fromString(noXmlDeclaration), preserveWS = false ).document().standAlone)
782+
783+ // XMLLoader returns standAlone status of the document straight from the `xml` declaration:
784+ assertTrue(XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(standAlone).standAlone.contains(true ))
785+ assertTrue(XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(nonStandAlone).standAlone.contains(false ))
786+ assertTrue(XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(default).standAlone.contains(false ))
787+ assertTrue(XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(noXmlDeclaration).standAlone.contains(false ))
788+ }
789+
790+ @ UnitTest
791+ def xmlVersion (): Unit = {
792+ val xml10 = s """ <?xml version="1.0"?><a/> """
793+ val xml11 = s """ <?xml version="1.1"?><a/> """
794+ val noXmlDeclaration : String = s """ <a/> """
795+
796+ // ConstructingParser returns XML version of the document straight from the `xml` declaration for version="1.0":
797+ assertEquals(Some (" 1.0" ), ConstructingParser .fromSource(scala.io.Source .fromString(xml10), preserveWS = false ).document().version)
798+ // ConstructingParser returns incorrect version value when the the version is "1.1" (and prints "cannot deal with versions != 1.0a"):
799+ assertTrue(ConstructingParser .fromSource(scala.io.Source .fromString(xml11), preserveWS = false ).document().version.isEmpty)
800+ // ConstructingParser incorrectly returns null version value when the document does not have the xml declaration:
801+ assertNull(ConstructingParser .fromSource(scala.io.Source .fromString(noXmlDeclaration), preserveWS = false ).document().version)
802+
803+ // XMLLoader returns XML version of the document straight from the `xml` declaration
804+ assertTrue(xercesInternal.getFeature(" http://xml.org/sax/features/xml-1.1" ))
805+ assertEquals(Some (" 1.0" ), XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(xml10).version)
806+ assertEquals(Some (" 1.1" ), XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(xml11).version)
807+ assertEquals(Some (" 1.0" ), XML .withSAXParser(xercesInternal.newSAXParser).loadStringDocument(noXmlDeclaration).version)
808+ }
809+
810+ @ UnitTest
811+ def xmlEncoding (): Unit = {
812+ val utf8 : String = s """ <?xml version="1.0" encoding="UTF-8"?><a/> """
813+ val utf16 : String = s """ <?xml version="1.0" encoding="UTF-16"?><a/> """
814+ val default : String = s """ <?xml version="1.0"?><a/> """
815+ val noXmlDeclaration : String = s """ <a/> """
816+
817+ // ConstructingParser returns XML encoding name canonicalized from the `xml` declaration:
818+ assertEquals(Some (" UTF-8" ), ConstructingParser .fromSource(scala.io.Source .fromString(utf8 ), preserveWS = false ).document().encoding)
819+ assertEquals(Some (" UTF-16" ), ConstructingParser .fromSource(scala.io.Source .fromString(utf16 ), preserveWS = false ).document().encoding)
820+ assertEquals(None , ConstructingParser .fromSource(scala.io.Source .fromString(default), preserveWS = false ).document().encoding)
821+ // ConstructingParser incorrectly returns null encoding value when the document does not have the xml declaration:
822+ assertNull(ConstructingParser .fromSource(scala.io.Source .fromString(noXmlDeclaration), preserveWS = false ).document().encoding)
823+
824+ // XMLLoader does not return the encoding specified in the `xml` declaration:
825+ assertEquals(None , XML .loadStringDocument(utf8).encoding)
826+ assertEquals(None , XML .loadStringDocument(utf16).encoding)
827+ assertEquals(None , XML .loadStringDocument(default).encoding)
828+ assertEquals(None , XML .loadStringDocument(noXmlDeclaration).encoding)
829+
830+ // XMLLoader returns the encoding determined from the Byte Order Mark in the document itself:
831+ assertEquals(Some (" UTF-8" ), XML .loadDocument(resourceUrl(" utf8" )).encoding)
832+ assertEquals(Some (" UTF-16BE" ), XML .loadDocument(resourceUrl(" utf16" )).encoding)
833+
834+ // ConstructingParser doesn't seem to be able to parse XML with Byte Order Mark:
835+ assertThrows(
836+ classOf [java.nio.charset.MalformedInputException ],
837+ () => ConstructingParser .fromSource(scala.io.Source .fromURI(resourceUrl(" utf16" ).toURI), preserveWS = false ).document().encoding
838+ )
839+ }
840+
841+ @ UnitTest
842+ def loadDtd (): Unit = {
843+ val parserFactory : javax.xml.parsers.SAXParserFactory = xercesExternal
844+ parserFactory.setFeature(" http://apache.org/xml/features/nonvalidating/load-external-dtd" , false )
845+
846+ val xml : String =
847+ s """ <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook V5.0//EN" "http://www.oasis-open.org/docbook/xml/5.0/docbook.dtd" [
848+ | <!ELEMENT AnyElement ANY>
849+ | <!ELEMENT EmptyElement EMPTY>
850+ | <!ELEMENT PCDataElement (#PCDATA)>
851+ | <!ELEMENT MixedElement (#PCDATA|element|complex)*>
852+ | <!ELEMENT ChildrenElement (element+,complex?)>
853+ | <!ELEMENT element (#PCDATA)>
854+ | <!ELEMENT complex (#PCDATA)>
855+ | <!ATTLIST complex
856+ | implied CDATA #IMPLIED
857+ | required CDATA #REQUIRED
858+ | fixed CDATA #FIXED "fixed"
859+ | default CDATA "default"
860+ | enumerated (InStock|Backordered|Discontinued) "InStock"
861+ | >
862+ | <!ENTITY AUTHOR "John Doe">
863+ | <!NOTATION jpg PUBLIC "JPG 1.0">
864+ |]>
865+ |<document>&AUTHOR;</document>
866+ | """ .stripMargin
867+
868+ val document : Document = XML .withSAXParser(parserFactory.newSAXParser).loadStringDocument(xml)
869+
870+ // XMLLoader parses and returns DTD.
871+ // Note: dtd.ContentModel that DTD uses to represent the element content model lacks fidelity:
872+ // occurrence indicators "?" and "+" can not be expressed.
873+ // Note: spurious parentheses come from the dtd.ContentModel's toString() methods...
874+ assertEquals(
875+ """ DTD PUBLIC "-//OASIS//DTD DocBook V5.0//EN" "http://www.oasis-open.org/docbook/xml/5.0/docbook.dtd" [
876+ |<!ELEMENT AnyElement ANY>
877+ |<!ELEMENT EmptyElement EMPTY>
878+ |<!ELEMENT PCDataElement (#PCDATA)>
879+ |<!ELEMENT MixedElement (#PCDATA|(element|complex))*>
880+ |<!ELEMENT ChildrenElement ((element)*,(complex)*)>
881+ |<!ELEMENT element (#PCDATA)>
882+ |<!ELEMENT complex (#PCDATA)>
883+ |<!ATTLIST complex
884+ | implied CDATA #IMPLIED
885+ | required CDATA #REQUIRED
886+ | fixed CDATA #FIXED "fixed"
887+ | default CDATA "default"
888+ | enumerated (InStock|Backordered|Discontinued) "InStock">
889+ |<!ENTITY AUTHOR "John Doe">
890+ |<!NOTATION jpg PUBLIC "JPG 1.0">
891+ |]""" .stripMargin,
892+ document.dtd.toString)
893+
894+ // XMLLoader resolves entities defined in the DTD -
895+ // XML parser parses and uses the DTD internally, so there is no need to install any additional entity resolvers:
896+ assertEquals(""" <document>John Doe</document>""" , document.docElem.toString)
897+
898+ val document2 : Document = ConstructingParser .fromSource(scala.io.Source .fromString(xml), preserveWS = false ).document()
899+
900+ // ConstructingParser
901+ // ignores
902+ // element declarations
903+ // attribute list declarations
904+ // some entity declarations
905+ // notations
906+ // captures
907+ // decls: List[Decl] - for EntityDecl and PEReference
908+ // ent: Map[String, EntityDecl]
909+ // returns only
910+ // decls
911+ assertEquals(
912+ s """ DTD PUBLIC "-//OASIS//DTD DocBook V5.0//EN" "http://www.oasis-open.org/docbook/xml/5.0/docbook.dtd" [
913+ |<!ENTITY AUTHOR "John Doe">
914+ |] """ .stripMargin,
915+ document2.dtd.toString)
916+
917+ // ConstructingParser resolves entities defined in the DTD
918+ assertEquals(""" <document>John Doe</document>""" , document2.docElem.toString)
919+ }
755920
756921 @ UnitTest
757922 def nodeSeqNs (): Unit = {
0 commit comments