@@ -510,8 +510,15 @@ class XMLTestJVM {
510
510
}
511
511
}
512
512
513
+ // With both internal and external Xerces now on the classpath, we explicitly disambiguate which one we want:
514
+ def xercesInternal : javax.xml.parsers.SAXParserFactory =
515
+ javax.xml.parsers.SAXParserFactory .newInstance(" com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl" , null )
516
+
517
+ def xercesExternal : javax.xml.parsers.SAXParserFactory =
518
+ javax.xml.parsers.SAXParserFactory .newInstance(" org.apache.xerces.jaxp.SAXParserFactoryImpl" , null )
519
+
513
520
/** Default SAXParserFactory */
514
- val defaultParserFactory : javax.xml.parsers.SAXParserFactory = javax.xml.parsers. SAXParserFactory .newInstance
521
+ val defaultParserFactory : javax.xml.parsers.SAXParserFactory = xercesInternal
515
522
516
523
@ throws(classOf [org.xml.sax.SAXNotRecognizedException ])
517
524
def issue17UnrecognizedFeature (): Unit = {
@@ -629,7 +636,7 @@ class XMLTestJVM {
629
636
// using namespace-aware parser, this works with FactoryAdapter enhanced to handle startPrefixMapping() events;
630
637
// see https://github.com/scala/scala-xml/issues/506
631
638
def roundtrip (namespaceAware : Boolean , xml : String ): Unit = {
632
- val parserFactory : javax.xml.parsers.SAXParserFactory = javax.xml.parsers. SAXParserFactory .newInstance()
639
+ val parserFactory : javax.xml.parsers.SAXParserFactory = xercesInternal
633
640
parserFactory.setFeature(" http://javax.xml.XMLConstants/feature/secure-processing" , true )
634
641
parserFactory.setFeature(" http://apache.org/xml/features/nonvalidating/load-external-dtd" , false )
635
642
parserFactory.setFeature(" http://apache.org/xml/features/disallow-doctype-decl" , true )
@@ -656,7 +663,7 @@ class XMLTestJVM {
656
663
657
664
@ UnitTest
658
665
def useXMLReaderWithXMLFilter (): Unit = {
659
- val parent : org.xml.sax.XMLReader = javax.xml.parsers. SAXParserFactory .newInstance .newSAXParser.getXMLReader
666
+ val parent : org.xml.sax.XMLReader = xercesInternal .newSAXParser.getXMLReader
660
667
val filter : org.xml.sax.XMLFilter = new org.xml.sax.helpers.XMLFilterImpl (parent) {
661
668
override def characters (ch : Array [Char ], start : Int , length : Int ): Unit = {
662
669
for (i <- 0 until length) if (ch(start+ i) == 'a' ) ch(start+ i) = 'b'
@@ -682,6 +689,67 @@ class XMLTestJVM {
682
689
assertTrue(gotAnError)
683
690
}
684
691
692
+ // Now that we can use XML parser configured to be namespace-aware,
693
+ // we can also configure it to be XInclude-aware and process XML Includes:
694
+ def check (
695
+ parserFactory : javax.xml.parsers.SAXParserFactory ,
696
+ resourceName : String ,
697
+ expected : String
698
+ ): Unit = {
699
+ parserFactory.setNamespaceAware(true )
700
+ parserFactory.setXIncludeAware(true )
701
+ val actual : String = XML
702
+ .withSAXParser(parserFactory.newSAXParser)
703
+ .load(getClass.getResource(resourceName).toString)
704
+ .toString
705
+
706
+ assertEquals(expected, actual)
707
+ }
708
+
709
+ // Here we demonstrate that XInclude works with both the external and the built-in Xerces:
710
+
711
+ val includerExpected : String =
712
+ s """ <includer>
713
+ | <includee xml:base="includee.xml">
714
+ | <content>Blah!</content>
715
+ |</includee>
716
+ |</includer> """ .stripMargin
717
+
718
+ @ UnitTest def xIncludeWithExternalXerces (): Unit = check(xercesExternal, " includer.xml" , includerExpected)
719
+ @ UnitTest def xIncludeWithInternalXerces (): Unit = check(xercesInternal, " includer.xml" , includerExpected)
720
+
721
+ // And here we demonstrate that both external and built-in Xerces report incorrect `xml:base`
722
+ // when the XML file included contains its own include, and included files are not in the same directory:
723
+ // `xml:base` on the `<collection>` element is incorrect
724
+ // books/book/author/volume/1.xml instead of the correct
725
+ // archive/books/book/author/volume/1.xml!
726
+ val siteUnfortunatelyExpected : String =
727
+ s """ <site xmlns:xi="http://www.w3.org/2001/XInclude">
728
+ | <store xml:base="archive/books.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
729
+ | <store xml:base="archive/books/book/author.xml" xmlns:xi="http://www.w3.org/2001/XInclude">
730
+ | <collection n="1" xml:base="books/book/author/volume/1.xml"/>
731
+ |</store>
732
+ |</store>
733
+ |</site> """ .stripMargin
734
+
735
+ // Turns out, this is a known Xerces bug https://issues.apache.org/jira/browse/XERCESJ-1102:
736
+ // - the bug was reported in October 2005 - more then seventeen years ago;
737
+ // - a patch fixing it (that I have not verified personally) was submitted many years ago;
738
+ // - the bug is still not fixed in the 2023 release of Xerces;
739
+ // - the bug was discussed by the Saxon users in https://saxonica.plan.io/issues/4664,
740
+ // and is allegedly fixed in SaxonC 11.1 - although how can this be with Saxon not shipping its own Xerces is not clear.
741
+ //
742
+ // In my own application, I had to "fix up" incorrect values produced by Xerces, taking into account
743
+ // specific directory layout being used. I can only speculate what others do, but none of the alternatives sound great:
744
+ // - avoid using nested includes altogether or flatten the directory hierarchy to appease the bug;
745
+ // - use privately patched version of Xerces;
746
+ // - use Saxon DOM parsing instead of Xerces' SAX.
747
+ //
748
+ // I find it utterly incomprehensible that foundational library shipped with JDK and used everywhere
749
+ // has a bug in its core functionality for years and it never gets fixed, but sadly, it is the state of affairs:
750
+ @ UnitTest def xIncludeFailWithExternalXerces (): Unit = check(xercesExternal, " site.xml" , siteUnfortunatelyExpected)
751
+ @ UnitTest def xIncludeFailWithInternalXerces (): Unit = check(xercesInternal, " site.xml" , siteUnfortunatelyExpected)
752
+
685
753
@ UnitTest
686
754
def nodeSeqNs (): Unit = {
687
755
val x : NodeBuffer = {
0 commit comments