1212namespace Symfony \Component \DomCrawler \Tests ;
1313
1414use PHPUnit \Framework \Attributes \DataProvider ;
15+ use PHPUnit \Framework \Attributes \Group ;
16+ use PHPUnit \Framework \Attributes \IgnoreDeprecations ;
1517use PHPUnit \Framework \Attributes \RequiresPhpExtension ;
1618use PHPUnit \Framework \Error \Notice ;
1719use PHPUnit \Framework \TestCase ;
2022use Symfony \Component \DomCrawler \Image ;
2123use Symfony \Component \DomCrawler \Link ;
2224
23- abstract class AbstractCrawlerTestCase extends TestCase
25+ class CrawlerTestCase extends TestCase
2426{
25- abstract public static function getDoctype (): string ;
27+ public static function getDoctype (): string
28+ {
29+ return '<!DOCTYPE html> ' ;
30+ }
2631
27- protected function createCrawler ($ node = null , ?string $ uri = null , ?string $ baseHref = null , bool $ useHtml5Parser = true )
32+ protected function createCrawler ($ node = null , ?string $ uri = null , ?string $ baseHref = null )
2833 {
29- return new Crawler ($ node , $ uri , $ baseHref , $ useHtml5Parser );
34+ return new Crawler ($ node , $ uri , $ baseHref , \ PHP_VERSION_ID >= 80400 );
3035 }
3136
3237 public function testConstructor ()
3338 {
3439 $ crawler = $ this ->createCrawler ();
3540 $ this ->assertCount (0 , $ crawler , '__construct() returns an empty crawler ' );
3641
37- $ doc = new \ DOMDocument ();
42+ $ doc = $ this -> createDomDocument ();
3843 $ node = $ doc ->createElement ('test ' );
3944
4045 $ crawler = $ this ->createCrawler ($ node );
@@ -236,7 +241,7 @@ public function testAddNode()
236241
237242 public function testClear ()
238243 {
239- $ doc = new \ DOMDocument ();
244+ $ doc = $ this -> createDomDocument ();
240245 $ node = $ doc ->createElement ('test ' );
241246
242247 $ crawler = $ this ->createCrawler ($ node );
@@ -407,7 +412,7 @@ public function testInnerText(
407412 public function testHtml ()
408413 {
409414 $ this ->assertEquals ('<img alt="Bar"> ' , $ this ->createTestCrawler ()->filterXPath ('//a[5] ' )->html ());
410- $ this ->assertEquals ('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"></button> ' , trim (preg_replace ('~>\s+<~ ' , '>< ' , $ this ->createTestCrawler ()->filterXPath ('//form[@id="FooFormId"] ' )->html ())));
415+ $ this ->assertEquals ('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"><input type="submit" value="FooBarValue" name="FooBarName" form="FooFormId"><input type="text" value="FooTextValue" name="FooTextName" form="FooFormId"><input type="image" alt="ImageAlt" form="FooFormId">< /button> ' , trim (preg_replace ('~>\s+<~ ' , '>< ' , $ this ->createTestCrawler ()->filterXPath ('//form[@id="FooFormId"] ' )->html ())));
411416
412417 try {
413418 $ this ->createTestCrawler ()->filterXPath ('//ol ' )->html ();
@@ -421,9 +426,9 @@ public function testHtml()
421426
422427 public function testEmojis ()
423428 {
424- $ crawler = $ this ->createCrawler ('<body><p>Hey 👋</p></body> ' );
429+ $ crawler = $ this ->createCrawler ('<head></head>< body><p>Hey 👋</p></body> ' );
425430
426- $ this ->assertSame ('<body><p>Hey 👋</p></body> ' , $ crawler ->html ());
431+ $ this ->assertSame ('<head></head>< body><p>Hey 👋</p></body> ' , $ crawler ->html ());
427432 }
428433
429434 public function testExtract ()
@@ -448,7 +453,7 @@ public function testFilterXpathComplexQueries()
448453 $ this ->assertCount (1 , $ crawler ->filterXPath ('./body ' ));
449454 $ this ->assertCount (1 , $ crawler ->filterXPath ('.//body ' ));
450455 $ this ->assertCount (6 , $ crawler ->filterXPath ('.//input ' ));
451- $ this ->assertCount (4 , $ crawler ->filterXPath ('//form ' )->filterXPath ('//button | //input ' ));
456+ $ this ->assertCount (7 , $ crawler ->filterXPath ('//form ' )->filterXPath ('//button | //input ' ));
452457 $ this ->assertCount (1 , $ crawler ->filterXPath ('body ' ));
453458 $ this ->assertCount (8 , $ crawler ->filterXPath ('//button | //input ' ));
454459 $ this ->assertCount (1 , $ crawler ->filterXPath ('//body ' ));
@@ -530,6 +535,16 @@ public function testFilterXPathWithAnUrl()
530535 $ this ->assertSame ('Music ' , $ crawler ->text ());
531536 }
532537
538+ public function testCaseSentivity ()
539+ {
540+ $ crawler = $ this ->createTestXmlCrawler ();
541+
542+ $ crawler = $ crawler ->filterXPath ('//*[local-name() = "CaseSensitiveTag"] ' );
543+ $ this ->assertCount (1 , $ crawler );
544+ $ this ->assertSame ('Some Content ' , $ crawler ->text ());
545+ $ this ->assertSame ('CaseSensitiveTag ' , $ crawler ->nodeName ());
546+ }
547+
533548 public function testFilterXPathWithFakeRoot ()
534549 {
535550 $ crawler = $ this ->createTestCrawler ();
@@ -1290,10 +1305,82 @@ public function testAddHtmlContentUnsupportedCharset()
12901305 $ this ->assertEquals ('Žťčýů ' , $ crawler ->filterXPath ('//p ' )->text ());
12911306 }
12921307
1293- public function createTestCrawler ( $ uri = null )
1308+ public function testAddXmlContentWithErrors ( )
12941309 {
1295- $ dom = new \DOMDocument ();
1296- $ dom ->loadHTML ($ this ->getDoctype ().'
1310+ $ internalErrors = libxml_use_internal_errors (true );
1311+
1312+ $ crawler = $ this ->createCrawler ();
1313+ $ crawler ->addXmlContent (<<<'EOF'
1314+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
1315+ <html>
1316+ <head>
1317+ </head>
1318+ <body>
1319+ <nav><a href="#"><a href="#"></nav>
1320+ </body>
1321+ </html>
1322+ EOF,
1323+ 'UTF-8 '
1324+ );
1325+
1326+ $ this ->assertGreaterThan (1 , libxml_get_errors ());
1327+
1328+ libxml_clear_errors ();
1329+ libxml_use_internal_errors ($ internalErrors );
1330+ }
1331+
1332+ #[IgnoreDeprecations]
1333+ #[Group('legacy ' )]
1334+ public function testHtml5ParserNotSameAsNativeParserForSpecificHtml ()
1335+ {
1336+ // Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
1337+ $ html = '<!DOCTYPE html><html><body><h1><p>Foo</p></h1></body></html> ' ;
1338+
1339+ $ html5Crawler = new Crawler (null , null , null , true );
1340+ $ html5Crawler ->add ($ html );
1341+
1342+ $ nativeCrawler = new Crawler (null , null , null , false );
1343+ $ nativeCrawler ->add ($ html );
1344+
1345+ $ this ->assertNotEquals ($ nativeCrawler ->filterXPath ('//h1 ' )->text (), $ html5Crawler ->filterXPath ('//h1 ' )->text (), 'Native parser and Html5 parser must be different ' );
1346+ }
1347+
1348+ public function testAddHtml5 ()
1349+ {
1350+ // Ensure a bug specific to the DOM extension is fixed (see https://github.com/symfony/symfony/issues/28596)
1351+ $ crawler = $ this ->createCrawler ();
1352+ $ crawler ->add ($ this ->getDoctype ().'<html><body><h1><p>Foo</p></h1></body></html> ' );
1353+ $ this ->assertEquals ('Foo ' , $ crawler ->filterXPath ('//h1 ' )->text (), '->add() adds nodes from a string ' );
1354+ }
1355+
1356+ #[DataProvider('html5Provider ' )]
1357+ public function testHtml5ParserParseContentStartingWithValidHeading (string $ content )
1358+ {
1359+ $ crawler = $ this ->createCrawler ();
1360+ $ crawler ->addHtmlContent ($ content );
1361+ self ::assertEquals (
1362+ 'Foo ' ,
1363+ $ crawler ->filterXPath ('//h1 ' )->text (),
1364+ '->addHtmlContent() parses valid HTML with comment before doctype '
1365+ );
1366+ }
1367+
1368+ public static function html5Provider (): iterable
1369+ {
1370+ $ html = self ::getDoctype ().'<html><body><h1><p>Foo</p></h1></body></html> ' ;
1371+ $ BOM = \chr (0xEF ).\chr (0xBB ).\chr (0xBF );
1372+
1373+ yield 'BOM first ' => [$ BOM .$ html ];
1374+ yield 'Single comment ' => ['<!-- comment --> ' .$ html ];
1375+ yield 'Multiline comment ' => ["<!-- \n multiline comment \n --> " .$ html ];
1376+ yield 'Several comments ' => ['<!--c--> <!--cc--> ' .$ html ];
1377+ yield 'Whitespaces ' => [' ' .$ html ];
1378+ yield 'All together ' => [$ BOM .' <!--c--> ' .$ html ];
1379+ }
1380+
1381+ protected function createTestCrawler ($ uri = null )
1382+ {
1383+ $ html = $ this ->getDoctype ().'
12971384 <html>
12981385 <body>
12991386 <a href="foo">Foo</a>
@@ -1352,9 +1439,9 @@ public function createTestCrawler($uri = null)
13521439 </div>
13531440 </body>
13541441 </html>
1355- ') ;
1442+ ' ;
13561443
1357- return $ this ->createCrawler ($ dom , $ uri );
1444+ return $ this ->createCrawler ($ html , $ uri );
13581445 }
13591446
13601447 protected function createTestXmlCrawler ($ uri = null )
@@ -1369,6 +1456,7 @@ protected function createTestXmlCrawler($uri = null)
13691456 <yt:aspectRatio>widescreen</yt:aspectRatio>
13701457 </media:group>
13711458 <media:category label="Music" scheme="http://gdata.youtube.com/schemas/2007/categories.cat">Music</media:category>
1459+ <CaseSensitiveTag>Some Content</CaseSensitiveTag>
13721460 </entry> ' ;
13731461
13741462 return $ this ->createCrawler ($ xml , $ uri );
0 commit comments