diff --git a/src/HTML5/Parser/Scanner.php b/src/HTML5/Parser/Scanner.php index 1b25888..5b96025 100644 --- a/src/HTML5/Parser/Scanner.php +++ b/src/HTML5/Parser/Scanner.php @@ -7,6 +7,7 @@ /** * The scanner scans over a given data input to react appropriately to characters. */ +#[\AllowDynamicProperties] class Scanner { const CHARS_HEX = 'abcdefABCDEF01234567890'; diff --git a/src/HTML5/Serializer/OutputRules.php b/src/HTML5/Serializer/OutputRules.php index ec467f2..0d712f6 100644 --- a/src/HTML5/Serializer/OutputRules.php +++ b/src/HTML5/Serializer/OutputRules.php @@ -9,6 +9,12 @@ namespace Masterminds\HTML5\Serializer; +use Dom\Attr; +use Dom\CharacterData; +use Dom\Document; +use Dom\Element; +use Dom\Node; +use Dom\XPath; use Masterminds\HTML5\Elements; /** @@ -229,9 +235,9 @@ public function element($ele) $this->openTag($ele); if (Elements::isA($name, Elements::TEXT_RAW)) { foreach ($ele->childNodes as $child) { - if ($child instanceof \DOMCharacterData) { + if ($child instanceof \DOMCharacterData || $child instanceof CharacterData) { $this->wr($child->data); - } elseif ($child instanceof \DOMElement) { + } elseif ($child instanceof \DOMElement || $child instanceof Element) { $this->element($child); } } @@ -299,13 +305,21 @@ public function processorInstruction($ele) */ protected function namespaceAttrs($ele) { - if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { - $this->xpath = new \DOMXPath($ele->ownerDocument); - } + $isLegacyDocument = static::isLegacyDocument($ele); + + // Finding namespace in new \Dom\Document will cause error message: + // DOMException: The namespace axis is not well-defined in the living DOM specification. + // Use Dom\Element::getInScopeNamespaces() or Dom\Element::getDescendantNamespaces() instead. + if ($isLegacyDocument) { + // TODO: Fix the namespace attrs writing. + if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { + $this->xpath = new \DOMXPath($ele->ownerDocument); + } - foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { - if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { - $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); + foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { + if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { + $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); + } } } } @@ -375,8 +389,14 @@ protected function attrs($ele) } } - protected function nonBooleanAttribute(\DOMAttr $attr) + protected function nonBooleanAttribute($attr) { + if (!$attr instanceof \DOMAttr && !$attr instanceof Attr) { + throw new \InvalidArgumentException( + __METHOD__ . '() argument 1 should be \DOMAttr or \Dom\Attr' + ); + } + $ele = $attr->ownerElement; foreach ($this->nonBooleanAttributes as $rule) { if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { @@ -415,10 +435,25 @@ protected function nonBooleanAttribute(\DOMAttr $attr) return false; } - private function getXPath(\DOMNode $node) + /** + * @param Node|\DOMNode $node + * + * @return XPath|\DOMXPath + */ + private function getXPath($node) { + $isLegacyDocument = static::isLegacyDocument($node); + + if ($isLegacyDocument) { + if (!$this->xpath) { + $this->xpath = new \DOMXPath($node->ownerDocument); + } + + return $this->xpath; + } + if (!$this->xpath) { - $this->xpath = new \DOMXPath($node->ownerDocument); + $this->xpath = new XPath($node->ownerDocument); } return $this->xpath; @@ -430,7 +465,7 @@ private function getXPath(\DOMNode $node) * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the * qualified name (8.3). * - * @param \DOMNode $ele The element being written. + * @param Node|\DOMNode $ele The element being written. */ protected function closeTag($ele) { @@ -550,4 +585,22 @@ protected function escape($text, $attribute = false) return strtr($text, $replace); } + + /** + * @param Node|\DOMNode $node + * + * @return bool + */ + protected static function isLegacyDocument($node) + { + if ($node instanceof Document) { + return false; + } + + if ($node instanceof \DOMDocument) { + return true; + } + + return $node->ownerDocument instanceof \DOMDocument; + } } diff --git a/src/HTML5/Serializer/Traverser.php b/src/HTML5/Serializer/Traverser.php index 1e8d792..6814452 100644 --- a/src/HTML5/Serializer/Traverser.php +++ b/src/HTML5/Serializer/Traverser.php @@ -2,6 +2,10 @@ namespace Masterminds\HTML5\Serializer; +use Dom\Document; +use Dom\DocumentFragment; +use Dom\NodeList; + /** * Traverser for walking a DOM tree. * @@ -60,16 +64,16 @@ public function __construct($dom, $out, RulesInterface $rules, $options = array( */ public function walk() { - if ($this->dom instanceof \DOMDocument) { + if ($this->dom instanceof \DOMDocument || $this->dom instanceof Document) { $this->rules->document($this->dom); - } elseif ($this->dom instanceof \DOMDocumentFragment) { + } elseif ($this->dom instanceof \DOMDocumentFragment || $this->dom instanceof DocumentFragment) { // Document fragments are a special case. Only the children need to // be serialized. if ($this->dom->hasChildNodes()) { $this->children($this->dom->childNodes); } } // If NodeList, loop - elseif ($this->dom instanceof \DOMNodeList) { + elseif ($this->dom instanceof \DOMNodeList || $this->dom instanceof NodeList) { // If this is a NodeList of DOMDocuments this will not work. $this->children($this->dom); } // Else assume this is a DOMNode-like datastructure. diff --git a/test/HTML5/Serializer/OutputRulesNewDomTest.php b/test/HTML5/Serializer/OutputRulesNewDomTest.php new file mode 100644 index 0000000..858c057 --- /dev/null +++ b/test/HTML5/Serializer/OutputRulesNewDomTest.php @@ -0,0 +1,642 @@ + + +
+ +This is a test.
+ + '; + + /** + * @var HTML5 + */ + protected $html5; + + public static function setUpBeforeClass(): void + { + if (PHP_VERSION_ID < 80400) { + self::markTestSkipped('New DOM only supports PHP 8.4+'); + } + + parent::setUpBeforeClass(); + } + + /** + * @before + */ + public function before() + { + $this->html5 = $this->getInstance(); + } + + public function loadHTML($html) + { + return HTMLDocument::createFromString( + $html, + LIBXML_HTML_NOIMPLIED + ); + } + + /** + * Using reflection we make a protected method accessible for testing. + * + * @param string $name + * The name of the method on the Traverser class to test + * + * @return \ReflectionMethod for the specified method + */ + public function getProtectedMethod($name) + { + $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\OutputRules'); + $method = $class->getMethod($name); + $method->setAccessible(true); + + return $method; + } + + public function getTraverserProtectedProperty($name) + { + $class = new \ReflectionClass('\Masterminds\HTML5\Serializer\Traverser'); + $property = $class->getProperty($name); + $property->setAccessible(true); + + return $property; + } + + public function getOutputRules($options = array()) + { + $options = $options + $this->html5->getOptions(); + $stream = fopen('php://temp', 'w'); + $dom = $this->loadHTML($this->markup); + $r = new OutputRules($stream, $options); + $t = new Traverser($dom, $stream, $r, $options); + + return array( + $r, + $stream, + ); + } + + public function testDocument() + { + $dom = $this->loadHTML('foo'); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $r->document($dom); + $expected = '' . PHP_EOL . 'foo' . PHP_EOL; + $this->assertEquals($expected, stream_get_contents($stream, -1, 0)); + } + + public function testEmptyDocument() + { + $dom = $this->loadHTML(''); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $r->document($dom); + $expected = '' . PHP_EOL; + $this->assertEquals($expected, stream_get_contents($stream, -1, 0)); + } + + public function testDoctype() + { + $dom = $this->loadHTML('foo'); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $m = $this->getProtectedMethod('doctype'); + $m->invoke($r, 'foo'); + $this->assertEquals('' . PHP_EOL, stream_get_contents($stream, -1, 0)); + } + + public function testElement() + { + $dom = $this->loadHTML( + ' + + + + + + '); + + $stream = fopen('php://temp', 'w'); + $r = new OutputRules($stream, $this->html5->getOptions()); + $t = new Traverser($dom, $stream, $r, $this->html5->getOptions()); + + $list = $dom->getElementsByTagName('div'); + $r->element($list->item(0)); + $this->assertEquals('', stream_get_contents($stream, -1, 0)); + } + + public function testSerializeWithNamespaces() + { + $this->html5 = $this->getInstance(array( + 'xmlNamespaces' => true, + )); + + $source = ' + + + +