From d8ac3610365caf54b86ccb75cf9b3390c0e49c2f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 12 Sep 2024 17:53:03 +0200 Subject: [PATCH 01/15] Add spawn_fragment_parser method --- .../html-api/class-wp-html-processor.php | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ed6ac0299b3c3..ad20c36e6205b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -424,6 +424,55 @@ function ( WP_HTML_Token $token ): void { }; } + /** + * Creates a fragment processor with the current node as its context element. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm + * + * @param string $html Input HTML fragment to process. + * @return static|null The created processor if successful, otherwise null. + */ + private function spawn_fragment_parser( string $html ): ?self { + if ( $this->get_token_type() !== '#tag' ) { + return null; + } + + /* + * Prevent creating fragments at "self-contained" nodes. + * + * @see https://github.com/WordPress/wordpress-develop/pull/7141 + * @see https://github.com/WordPress/wordpress-develop/pull/7198 + */ + if ( + 'html' === $this->get_namespace() && + in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ) { + return null; + } + + $fragment_processor = self::create_fragment( $html ); + $fragment_processor->compat_mode = $this->compat_mode; + + // @todo The context element probably needs a namespace{ + $context_element = array( $this->get_tag(), array() ); + foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { + $context_element[1][ $name ] = $value; + } + $fragment_processor->state->context_node = $context_element; + + if ( 'TEMPLATE' === $context_element[0] ) { + $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $fragment_processor->reset_insertion_mode_appropriately(); + + // @todo Set the parser's form element pointer. + + $fragment_processor->state->encoding_confidence = 'irrelevant'; + + return $fragment_processor; + } + /** * Stops the parser and terminates its execution when encountering unsupported markup. * From ad8f8db5589d3d88061dd714e8cf17a994fc9d55 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 12 Sep 2024 18:13:05 +0200 Subject: [PATCH 02/15] Fix the processor context_node --- src/wp-includes/html-api/class-wp-html-processor.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ad20c36e6205b..07a7b31450cd4 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -453,11 +453,18 @@ private function spawn_fragment_parser( string $html ): ?self { $fragment_processor = self::create_fragment( $html ); $fragment_processor->compat_mode = $this->compat_mode; - // @todo The context element probably needs a namespace{ $context_element = array( $this->get_tag(), array() ); foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { $context_element[1][ $name ] = $value; } + + $fragment_processor->context_node = new WP_HTML_Token( + 'context-node', + $context_element[0], + $this->has_self_closing_flag() + ); + $fragment_processor->context_node->namespace = $this->get_namespace(); + $fragment_processor->state->context_node = $context_element; if ( 'TEMPLATE' === $context_element[0] ) { From e2efee4d844eef86cd8146427f3b55466f6e3abb Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:36:57 +0200 Subject: [PATCH 03/15] Make it public --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 07a7b31450cd4..073e23285d40b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -432,7 +432,7 @@ function ( WP_HTML_Token $token ): void { * @param string $html Input HTML fragment to process. * @return static|null The created processor if successful, otherwise null. */ - private function spawn_fragment_parser( string $html ): ?self { + public function spawn_fragment_parser( string $html ): ?self { if ( $this->get_token_type() !== '#tag' ) { return null; } From a473d5bfcd271a211e6e6b3e704af2ad7997fe8e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 9 Oct 2024 19:24:04 +0200 Subject: [PATCH 04/15] Implement naive set_inner_html --- .../html-api/class-wp-html-processor.php | 106 +++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ed6ac0299b3c3..033ea86b0b56f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -355,6 +355,109 @@ public static function create_full_parser( $html, $known_definite_encoding = 'UT return $processor; } + public function set_inner_html( $html ) { + if ( $this->is_virtual() ) { + return false; + } + + if ( $this->get_token_type() !== '#tag' ) { + return false; + } + + if ( $this->is_tag_closer() ) { + return false; + } + + if ( ! $this->expects_closer() ) { + return false; + } + + if ( + 'html' !== $this->state->current_token->namespace && + $this->state->current_token->has_self_closing_flag + ) { + return false; + } + + $html_for_replacement = $this->normalize( $html ); + if ( empty( $html_for_replacement ) ) { + return false; + } + + // @todo apply modifications if there are any??? + + if ( ! parent::set_bookmark( 'SET_INNER_HTML: opener' ) ) { + return false; + } + + if ( ! $this->seek_to_matching_closer() ) { + parent::seek( 'SET_INNER_HTML: opener' ); + return false; + } + + if ( ! parent::set_bookmark( 'SET_INNER_HTML: closer' ) ) { + return false; + } + + $inner_html_start = $this->bookmarks['SET_INNER_HTML: opener']->start + $this->bookmarks['SET_INNER_HTML: opener']->length; + $inner_html_length = $this->bookmarks['SET_INNER_HTML: closer']->start - $inner_html_start; + + echo 'INNER HTML: ' . substr( $this->html, $inner_html_start, $inner_html_length ) . "\n"; + + echo "BEFORE:\n"; + var_dump( $this->get_updated_html() ); + + $this->lexical_updates['innerHTML'] = new WP_HTML_Text_Replacement( + $inner_html_start, + $inner_html_length, + $html_for_replacement + ); + + parent::seek( 'SET_INNER_HTML: opener' ); + parent::release_bookmark( 'SET_INNER_HTML: opener' ); + parent::release_bookmark( 'SET_INNER_HTML: closer' ); + echo "AFTER:\n"; + var_dump( $this->get_updated_html() ); + + // @todo check for whether that html will make a mess! + // Will it break out of tags? + + return true; + } + + public function seek_to_matching_closer(): bool { + $tag_name = $this->get_tag(); + + if ( null === $tag_name ) { + return false; + } + + if ( $this->is_tag_closer() ) { + return false; + } + + if ( ! $this->expects_closer() ) { + return false; + } + + $breadcrumbs = $this->breadcrumbs; + array_pop( $breadcrumbs ); + + // @todo Can't use these queries together + while ( $this->next_tag( + array( + 'tag_name' => $this->get_tag(), + 'tag_closers' => 'visit', + ) + ) ) { + if ( $this->get_breadcrumbs() === $breadcrumbs ) { + return true; + } + } + return false; + } + + /** * Constructor. * @@ -522,6 +625,7 @@ public function get_unsupported_exception() { * 1 for "first" tag, 3 for "third," etc. * Defaults to first tag. * @type string|null $class_name Tag must contain this whole class name to match. + * @type string $tag_name Tag name to match. * @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. * } @@ -545,7 +649,7 @@ public function next_tag( $query = null ): bool { } if ( is_string( $query ) ) { - $query = array( 'breadcrumbs' => array( $query ) ); + $query = array( 'tag_name' => $query ); } if ( ! is_array( $query ) ) { From 4f5249c82ad100c38ccca30ccc8ebe9999fd91d6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:38:49 +0200 Subject: [PATCH 05/15] Fix spawn_fragment_parser method --- .../html-api/class-wp-html-processor.php | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 073e23285d40b..24a02a26819a7 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -437,6 +437,8 @@ public function spawn_fragment_parser( string $html ): ?self { return null; } + $namespace = $this->get_namespace(); + /* * Prevent creating fragments at "self-contained" nodes. * @@ -444,7 +446,7 @@ public function spawn_fragment_parser( string $html ): ?self { * @see https://github.com/WordPress/wordpress-develop/pull/7198 */ if ( - 'html' === $this->get_namespace() && + 'html' === $namespace && in_array( $this->get_tag(), array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { return null; @@ -453,19 +455,17 @@ public function spawn_fragment_parser( string $html ): ?self { $fragment_processor = self::create_fragment( $html ); $fragment_processor->compat_mode = $this->compat_mode; - $context_element = array( $this->get_tag(), array() ); + + $fragment_processor->context_node = clone $this->state->current_token; + $fragment_processor->context_node->bookmark_name = 'context-node'; + $fragment_processor->context_node->on_destroy = null; + + $context_element = array( $fragment_processor->context_node->node_name, array() ); foreach ( $this->get_attribute_names_with_prefix( '' ) as $name => $value ) { $context_element[1][ $name ] = $value; } - $fragment_processor->context_node = new WP_HTML_Token( - 'context-node', - $context_element[0], - $this->has_self_closing_flag() - ); - $fragment_processor->context_node->namespace = $this->get_namespace(); - - $fragment_processor->state->context_node = $context_element; + $fragment_processor->breadcrumbs = array(); if ( 'TEMPLATE' === $context_element[0] ) { $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; From 57279ff11927592a2e9f2017c38d046e9a2deefc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 16:15:34 +0200 Subject: [PATCH 06/15] Remove debug code --- src/wp-includes/html-api/class-wp-html-processor.php | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 033ea86b0b56f..3d43d3c76bbb9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -402,11 +402,6 @@ public function set_inner_html( $html ) { $inner_html_start = $this->bookmarks['SET_INNER_HTML: opener']->start + $this->bookmarks['SET_INNER_HTML: opener']->length; $inner_html_length = $this->bookmarks['SET_INNER_HTML: closer']->start - $inner_html_start; - echo 'INNER HTML: ' . substr( $this->html, $inner_html_start, $inner_html_length ) . "\n"; - - echo "BEFORE:\n"; - var_dump( $this->get_updated_html() ); - $this->lexical_updates['innerHTML'] = new WP_HTML_Text_Replacement( $inner_html_start, $inner_html_length, @@ -416,8 +411,6 @@ public function set_inner_html( $html ) { parent::seek( 'SET_INNER_HTML: opener' ); parent::release_bookmark( 'SET_INNER_HTML: opener' ); parent::release_bookmark( 'SET_INNER_HTML: closer' ); - echo "AFTER:\n"; - var_dump( $this->get_updated_html() ); // @todo check for whether that html will make a mess! // Will it break out of tags? From eaed8634dee6e084d63d597969a72d077c5c382f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:39:28 +0200 Subject: [PATCH 07/15] Process non-body context tests --- tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 808fa39d17f26..7f607ad63ebfc 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -138,10 +138,6 @@ public function data_external_html5lib_tests() { * @return bool True if the test case should be skipped. False otherwise. */ private static function should_skip_test( ?string $test_context_element, string $test_name ): bool { - if ( null !== $test_context_element && 'body' !== $test_context_element ) { - return true; - } - if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) { return true; } From 097f4eb0d58ca83c3545b4b66f3f5e0b750821ea Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 16:15:46 +0200 Subject: [PATCH 08/15] Add set_inner_html happy path tests --- .../tests/html-api/wpHtmlProcessor.php | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index ba3407ff84cbf..a2c50c4df3990 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -863,4 +863,30 @@ public function test_adjusts_for_mathml_integration_points() { 'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.' ); } + + /** + * @ticket TBD + * + * @dataProvider data_set_inner_html + */ + public function test_set_inner_html( string $html, string $replacement, string $expected ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + while ( $processor->next_tag() ) { + if ( $processor->get_attribute( 'target' ) ) { + break; + } + } + $processor->set_inner_html( $replacement ); + $this->assertSame( $expected, $processor->get_updated_html() ); + } + + public static function data_set_inner_html() { + return array( + 'image in mathml' => array( + '
replace me
', + 'with me!', + '
with me!
', + ), + ); + } } From 25b18fa88d860b83ff7b126a12f37b205bfc13c3 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 12:42:30 +0200 Subject: [PATCH 09/15] Handle all the different document context in html5lib tests --- .../html-api/wpHtmlProcessorHtml5lib.php | 76 +++++++++++++++++-- 1 file changed, 71 insertions(+), 5 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 7f607ad63ebfc..041132ed50c20 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -153,11 +153,77 @@ private static function should_skip_test( ?string $test_context_element, string * @return string|null Tree structure of parsed HTML, if supported, else null. */ private static function build_tree_representation( ?string $fragment_context, string $html ) { - $processor = $fragment_context - ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) - : WP_HTML_Processor::create_full_parser( $html ); - if ( null === $processor ) { - throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + $processor = null; + if ( $fragment_context ) { + if ( 'body' === $fragment_context ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + } else { + + /* + * If the string of characters starts with "svg ", the context + * element is in the SVG namespace and the substring after + * "svg " is the local name. If the string of characters starts + * with "math ", the context element is in the MathML namespace + * and the substring after "math " is the local name. + * Otherwise, the context element is in the HTML namespace and + * the string is the local name. + */ + if ( str_starts_with( $fragment_context, 'svg ' ) ) { + $tag_name = substr( $fragment_context, 4 ); + if ( 'svg' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { + $tag_name = substr( $fragment_context, 5 ); + if ( 'math' === $tag_name ) { + $parent_processor = WP_HTML_Processor::create_full_parser( '' ); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$tag_name}>" ); + } + $parent_processor->next_tag( $tag_name ); + } else { + if ( in_array( + $fragment_context, + array( + 'caption', + 'col', + 'colgroup', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + ), + true + ) ) { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + $parent_processor->next_tag(); + } else { + $parent_processor = WP_HTML_Processor::create_full_parser( "<{$fragment_context}>" ); + } + $parent_processor->next_tag( $fragment_context ); + } + if ( null !== $parent_processor->get_unsupported_exception() ) { + throw $parent_processor->get_unsupported_exception(); + } + if ( null !== $parent_processor->get_last_error() ) { + throw new Exception( $parent_processor->get_last_error() ); + } + $processor = $parent_processor->spawn_fragment_parser( $html ); + } + + if ( null === $processor ) { + throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); + } + } else { + $processor = WP_HTML_Processor::create_full_parser( $html ); + if ( null === $processor ) { + throw new Exception( 'Could not create a full parser.' ); + } } /* From 796e0d7d1c51a6c564d8496b7391144e0699666e Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 16:26:23 +0200 Subject: [PATCH 10/15] Enable clearing inner HTML --- src/wp-includes/html-api/class-wp-html-processor.php | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 3d43d3c76bbb9..f787e5c00a4e2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -355,7 +355,7 @@ public static function create_full_parser( $html, $known_definite_encoding = 'UT return $processor; } - public function set_inner_html( $html ) { + public function set_inner_html( ?string $html ) { if ( $this->is_virtual() ) { return false; } @@ -379,10 +379,7 @@ public function set_inner_html( $html ) { return false; } - $html_for_replacement = $this->normalize( $html ); - if ( empty( $html_for_replacement ) ) { - return false; - } + $html = $html ? $this->normalize( $html ) : ''; // @todo apply modifications if there are any??? @@ -405,7 +402,7 @@ public function set_inner_html( $html ) { $this->lexical_updates['innerHTML'] = new WP_HTML_Text_Replacement( $inner_html_start, $inner_html_length, - $html_for_replacement + $html ); parent::seek( 'SET_INNER_HTML: opener' ); From 9ac142f67a2a91a43a2daa669d467b5acf8efcfc Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 13 Sep 2024 13:45:28 +0200 Subject: [PATCH 11/15] lints --- src/wp-includes/html-api/class-wp-html-processor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 24a02a26819a7..bce9949d286b0 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -455,7 +455,6 @@ public function spawn_fragment_parser( string $html ): ?self { $fragment_processor = self::create_fragment( $html ); $fragment_processor->compat_mode = $this->compat_mode; - $fragment_processor->context_node = clone $this->state->current_token; $fragment_processor->context_node->bookmark_name = 'context-node'; $fragment_processor->context_node->on_destroy = null; From 2aadea5a49878022949fdd0e150cb32db7ab04f6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 17:06:43 +0200 Subject: [PATCH 12/15] Add more tests --- .../tests/html-api/wpHtmlProcessor.php | 77 ++++++++++++++++++- 1 file changed, 74 insertions(+), 3 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index a2c50c4df3990..b7d3d71f692fd 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -869,24 +869,95 @@ public function test_adjusts_for_mathml_integration_points() { * * @dataProvider data_set_inner_html */ - public function test_set_inner_html( string $html, string $replacement, string $expected ) { + public function test_set_inner_html( string $html, ?string $replacement, string $expected ) { $processor = WP_HTML_Processor::create_fragment( $html ); while ( $processor->next_tag() ) { if ( $processor->get_attribute( 'target' ) ) { break; } } - $processor->set_inner_html( $replacement ); + + $this->assertTrue( $processor->set_inner_html( $replacement ) ); $this->assertSame( $expected, $processor->get_updated_html() ); } public static function data_set_inner_html() { return array( - 'image in mathml' => array( + array( '
replace me
', 'with me!', '
with me!
', ), + array( + '', + 'with me!', + '
with me!
', + ), + array( + '
replace me
', + 'with me!', + '
with me!
', + ), + ); + } + + /** + * @ticket TBD + * + * @dataProvider data_set_inner_html_not_allowed + */ + public function test_set_inner_html_not_allowed( string $html, string $replacement ) { + $processor = WP_HTML_Processor::create_fragment( $html ); + while ( $processor->next_tag() ) { + if ( $processor->get_attribute( 'target' ) ) { + break; + } + } + $this->assertFalse( $processor->set_inner_html( $replacement ) ); + $this->assertSame( $html, $processor->get_updated_html() ); + } + + /** + * Data provider. + * + * @return array[] + */ + public static function data_set_inner_html_not_allowed(): array { + return array( + 'not allowed in void tags' => array( + '
', + 'anything', + ), + 'not allowed in self-closing tags' => array( + '', + 'anything', + ), + 'must have closing tag' => array( + '
', + 'anything', + ), + + 'a in a' => array( + '', + '', + ), + 'a nested in a' => array( + '', + 'A cannot nest inside a', + ), + + 'text in table' => array( + '
hello
', + 'text triggers forstering - not allowed', + ), + 'text in thead' => array( + '', + 'text triggers forstering - not allowed', + ), + 'text in tr' => array( + '
hello
hello', + 'text triggers forstering - not allowed', + ), ); } } From b21ed570882bdd8993bb012cf59b50caa76e7709 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 17:21:47 +0200 Subject: [PATCH 13/15] Use spawn_fragment_parser + serialize to normalize --- .../html-api/class-wp-html-processor.php | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 635bc4cc7893c..86caf5a43e08e 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -379,7 +379,23 @@ public function set_inner_html( ?string $html ) { return false; } - $html = $html ? $this->normalize( $html ) : ''; + if ( null === $html ) { + $html = ''; + } + if ( '' !== $html ) { + $fragment_parser = $this->spawn_fragment_parser( $html ); + if ( + null === $fragment_parser + ) { + return false; + } + + try { + $html = $fragment_parser->serialize(); + } catch ( Exception $e ) { + return false; + } + } // @todo apply modifications if there are any??? From 854f5e0dbec6c04761f0a9f8e065b6f776f8ead0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 17:35:47 +0200 Subject: [PATCH 14/15] Fix expected table test result --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index b7d3d71f692fd..48efd292d383f 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -896,7 +896,7 @@ public static function data_set_inner_html() { array( '
replace me
', 'with me!', - '
with me!
', + '
with me!
', ), ); } From 823659cc6aa2c3df8b131956fefd6d7d6b66cd54 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Thu, 10 Oct 2024 17:36:04 +0200 Subject: [PATCH 15/15] Improve failed test message --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 48efd292d383f..e04f8cfd7d46d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -913,7 +913,7 @@ public function test_set_inner_html_not_allowed( string $html, string $replaceme break; } } - $this->assertFalse( $processor->set_inner_html( $replacement ) ); + $this->assertFalse( $processor->set_inner_html( $replacement ), "Should have failed but produced: {$processor->get_updated_html()}" ); $this->assertSame( $html, $processor->get_updated_html() ); }